sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 20 21logger = logging.getLogger("sqlglot") 22 23OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 24 25 26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 37 38 39def build_like(args: t.List) -> exp.Escape | exp.Like: 40 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 41 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 42 43 44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range 56 57 58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 69 70 71def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 72 arg = seq_get(args, 0) 73 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 74 75 76def build_lower(args: t.List) -> exp.Lower | exp.Hex: 77 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 78 arg = seq_get(args, 0) 79 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 80 81 82def build_upper(args: t.List) -> exp.Upper | exp.Hex: 83 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 84 arg = seq_get(args, 0) 85 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 86 87 88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder 99 100 101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression) 110 111 112def build_pad(args: t.List, is_left: bool = True): 113 return exp.Pad( 114 this=seq_get(args, 0), 115 expression=seq_get(args, 1), 116 fill_pattern=seq_get(args, 2), 117 is_left=is_left, 118 ) 119 120 121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp 130 131 132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args) 142 143 144def build_trim(args: t.List, is_left: bool = True): 145 return exp.Trim( 146 this=seq_get(args, 0), 147 expression=seq_get(args, 1), 148 position="LEADING" if is_left else "TRAILING", 149 ) 150 151 152def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 153 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 154 155 156def build_locate_strposition(args: t.List): 157 return exp.StrPosition( 158 this=seq_get(args, 1), 159 substr=seq_get(args, 0), 160 position=seq_get(args, 2), 161 ) 162 163 164class _Parser(type): 165 def __new__(cls, clsname, bases, attrs): 166 klass = super().__new__(cls, clsname, bases, attrs) 167 168 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 169 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 170 171 return klass 172 173 174class Parser(metaclass=_Parser): 175 """ 176 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 177 178 Args: 179 error_level: The desired error level. 180 Default: ErrorLevel.IMMEDIATE 181 error_message_context: The amount of context to capture from a query string when displaying 182 the error message (in number of characters). 183 Default: 100 184 max_errors: Maximum number of error messages to include in a raised ParseError. 185 This is only relevant if error_level is ErrorLevel.RAISE. 186 Default: 3 187 """ 188 189 FUNCTIONS: t.Dict[str, t.Callable] = { 190 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 191 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 192 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 193 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 194 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 195 ), 196 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 197 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 198 ), 199 "CHAR": lambda args: exp.Chr(expressions=args), 200 "CHR": lambda args: exp.Chr(expressions=args), 201 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 202 "CONCAT": lambda args, dialect: exp.Concat( 203 expressions=args, 204 safe=not dialect.STRICT_STRING_CONCAT, 205 coalesce=dialect.CONCAT_COALESCE, 206 ), 207 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 208 expressions=args, 209 safe=not dialect.STRICT_STRING_CONCAT, 210 coalesce=dialect.CONCAT_COALESCE, 211 ), 212 "CONVERT_TIMEZONE": build_convert_timezone, 213 "DATE_TO_DATE_STR": lambda args: exp.Cast( 214 this=seq_get(args, 0), 215 to=exp.DataType(this=exp.DataType.Type.TEXT), 216 ), 217 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 218 start=seq_get(args, 0), 219 end=seq_get(args, 1), 220 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 221 ), 222 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 223 "HEX": build_hex, 224 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 225 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 226 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 227 "LIKE": build_like, 228 "LOG": build_logarithm, 229 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 230 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 231 "LOWER": build_lower, 232 "LPAD": lambda args: build_pad(args), 233 "LEFTPAD": lambda args: build_pad(args), 234 "LTRIM": lambda args: build_trim(args), 235 "MOD": build_mod, 236 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 237 "RPAD": lambda args: build_pad(args, is_left=False), 238 "RTRIM": lambda args: build_trim(args, is_left=False), 239 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 240 if len(args) != 2 241 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 242 "STRPOS": exp.StrPosition.from_arg_list, 243 "CHARINDEX": lambda args: build_locate_strposition(args), 244 "INSTR": exp.StrPosition.from_arg_list, 245 "LOCATE": lambda args: build_locate_strposition(args), 246 "TIME_TO_TIME_STR": lambda args: exp.Cast( 247 this=seq_get(args, 0), 248 to=exp.DataType(this=exp.DataType.Type.TEXT), 249 ), 250 "TO_HEX": build_hex, 251 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 252 this=exp.Cast( 253 this=seq_get(args, 0), 254 to=exp.DataType(this=exp.DataType.Type.TEXT), 255 ), 256 start=exp.Literal.number(1), 257 length=exp.Literal.number(10), 258 ), 259 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 260 "UPPER": build_upper, 261 "VAR_MAP": build_var_map, 262 } 263 264 NO_PAREN_FUNCTIONS = { 265 TokenType.CURRENT_DATE: exp.CurrentDate, 266 TokenType.CURRENT_DATETIME: exp.CurrentDate, 267 TokenType.CURRENT_TIME: exp.CurrentTime, 268 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 269 TokenType.CURRENT_USER: exp.CurrentUser, 270 } 271 272 STRUCT_TYPE_TOKENS = { 273 TokenType.NESTED, 274 TokenType.OBJECT, 275 TokenType.STRUCT, 276 TokenType.UNION, 277 } 278 279 NESTED_TYPE_TOKENS = { 280 TokenType.ARRAY, 281 TokenType.LIST, 282 TokenType.LOWCARDINALITY, 283 TokenType.MAP, 284 TokenType.NULLABLE, 285 TokenType.RANGE, 286 *STRUCT_TYPE_TOKENS, 287 } 288 289 ENUM_TYPE_TOKENS = { 290 TokenType.DYNAMIC, 291 TokenType.ENUM, 292 TokenType.ENUM8, 293 TokenType.ENUM16, 294 } 295 296 AGGREGATE_TYPE_TOKENS = { 297 TokenType.AGGREGATEFUNCTION, 298 TokenType.SIMPLEAGGREGATEFUNCTION, 299 } 300 301 TYPE_TOKENS = { 302 TokenType.BIT, 303 TokenType.BOOLEAN, 304 TokenType.TINYINT, 305 TokenType.UTINYINT, 306 TokenType.SMALLINT, 307 TokenType.USMALLINT, 308 TokenType.INT, 309 TokenType.UINT, 310 TokenType.BIGINT, 311 TokenType.UBIGINT, 312 TokenType.INT128, 313 TokenType.UINT128, 314 TokenType.INT256, 315 TokenType.UINT256, 316 TokenType.MEDIUMINT, 317 TokenType.UMEDIUMINT, 318 TokenType.FIXEDSTRING, 319 TokenType.FLOAT, 320 TokenType.DOUBLE, 321 TokenType.UDOUBLE, 322 TokenType.CHAR, 323 TokenType.NCHAR, 324 TokenType.VARCHAR, 325 TokenType.NVARCHAR, 326 TokenType.BPCHAR, 327 TokenType.TEXT, 328 TokenType.MEDIUMTEXT, 329 TokenType.LONGTEXT, 330 TokenType.BLOB, 331 TokenType.MEDIUMBLOB, 332 TokenType.LONGBLOB, 333 TokenType.BINARY, 334 TokenType.VARBINARY, 335 TokenType.JSON, 336 TokenType.JSONB, 337 TokenType.INTERVAL, 338 TokenType.TINYBLOB, 339 TokenType.TINYTEXT, 340 TokenType.TIME, 341 TokenType.TIMETZ, 342 TokenType.TIMESTAMP, 343 TokenType.TIMESTAMP_S, 344 TokenType.TIMESTAMP_MS, 345 TokenType.TIMESTAMP_NS, 346 TokenType.TIMESTAMPTZ, 347 TokenType.TIMESTAMPLTZ, 348 TokenType.TIMESTAMPNTZ, 349 TokenType.DATETIME, 350 TokenType.DATETIME2, 351 TokenType.DATETIME64, 352 TokenType.SMALLDATETIME, 353 TokenType.DATE, 354 TokenType.DATE32, 355 TokenType.INT4RANGE, 356 TokenType.INT4MULTIRANGE, 357 TokenType.INT8RANGE, 358 TokenType.INT8MULTIRANGE, 359 TokenType.NUMRANGE, 360 TokenType.NUMMULTIRANGE, 361 TokenType.TSRANGE, 362 TokenType.TSMULTIRANGE, 363 TokenType.TSTZRANGE, 364 TokenType.TSTZMULTIRANGE, 365 TokenType.DATERANGE, 366 TokenType.DATEMULTIRANGE, 367 TokenType.DECIMAL, 368 TokenType.DECIMAL32, 369 TokenType.DECIMAL64, 370 TokenType.DECIMAL128, 371 TokenType.DECIMAL256, 372 TokenType.UDECIMAL, 373 TokenType.BIGDECIMAL, 374 TokenType.UUID, 375 TokenType.GEOGRAPHY, 376 TokenType.GEOMETRY, 377 TokenType.POINT, 378 TokenType.RING, 379 TokenType.LINESTRING, 380 TokenType.MULTILINESTRING, 381 TokenType.POLYGON, 382 TokenType.MULTIPOLYGON, 383 TokenType.HLLSKETCH, 384 TokenType.HSTORE, 385 TokenType.PSEUDO_TYPE, 386 TokenType.SUPER, 387 TokenType.SERIAL, 388 TokenType.SMALLSERIAL, 389 TokenType.BIGSERIAL, 390 TokenType.XML, 391 TokenType.YEAR, 392 TokenType.USERDEFINED, 393 TokenType.MONEY, 394 TokenType.SMALLMONEY, 395 TokenType.ROWVERSION, 396 TokenType.IMAGE, 397 TokenType.VARIANT, 398 TokenType.VECTOR, 399 TokenType.OBJECT, 400 TokenType.OBJECT_IDENTIFIER, 401 TokenType.INET, 402 TokenType.IPADDRESS, 403 TokenType.IPPREFIX, 404 TokenType.IPV4, 405 TokenType.IPV6, 406 TokenType.UNKNOWN, 407 TokenType.NULL, 408 TokenType.NAME, 409 TokenType.TDIGEST, 410 TokenType.DYNAMIC, 411 *ENUM_TYPE_TOKENS, 412 *NESTED_TYPE_TOKENS, 413 *AGGREGATE_TYPE_TOKENS, 414 } 415 416 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 417 TokenType.BIGINT: TokenType.UBIGINT, 418 TokenType.INT: TokenType.UINT, 419 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 420 TokenType.SMALLINT: TokenType.USMALLINT, 421 TokenType.TINYINT: TokenType.UTINYINT, 422 TokenType.DECIMAL: TokenType.UDECIMAL, 423 TokenType.DOUBLE: TokenType.UDOUBLE, 424 } 425 426 SUBQUERY_PREDICATES = { 427 TokenType.ANY: exp.Any, 428 TokenType.ALL: exp.All, 429 TokenType.EXISTS: exp.Exists, 430 TokenType.SOME: exp.Any, 431 } 432 433 RESERVED_TOKENS = { 434 *Tokenizer.SINGLE_TOKENS.values(), 435 TokenType.SELECT, 436 } - {TokenType.IDENTIFIER} 437 438 DB_CREATABLES = { 439 TokenType.DATABASE, 440 TokenType.DICTIONARY, 441 TokenType.MODEL, 442 TokenType.NAMESPACE, 443 TokenType.SCHEMA, 444 TokenType.SEQUENCE, 445 TokenType.SINK, 446 TokenType.SOURCE, 447 TokenType.STORAGE_INTEGRATION, 448 TokenType.STREAMLIT, 449 TokenType.TABLE, 450 TokenType.TAG, 451 TokenType.VIEW, 452 TokenType.WAREHOUSE, 453 } 454 455 CREATABLES = { 456 TokenType.COLUMN, 457 TokenType.CONSTRAINT, 458 TokenType.FOREIGN_KEY, 459 TokenType.FUNCTION, 460 TokenType.INDEX, 461 TokenType.PROCEDURE, 462 *DB_CREATABLES, 463 } 464 465 ALTERABLES = { 466 TokenType.INDEX, 467 TokenType.TABLE, 468 TokenType.VIEW, 469 } 470 471 # Tokens that can represent identifiers 472 ID_VAR_TOKENS = { 473 TokenType.ALL, 474 TokenType.ATTACH, 475 TokenType.VAR, 476 TokenType.ANTI, 477 TokenType.APPLY, 478 TokenType.ASC, 479 TokenType.ASOF, 480 TokenType.AUTO_INCREMENT, 481 TokenType.BEGIN, 482 TokenType.BPCHAR, 483 TokenType.CACHE, 484 TokenType.CASE, 485 TokenType.COLLATE, 486 TokenType.COMMAND, 487 TokenType.COMMENT, 488 TokenType.COMMIT, 489 TokenType.CONSTRAINT, 490 TokenType.COPY, 491 TokenType.CUBE, 492 TokenType.CURRENT_SCHEMA, 493 TokenType.DEFAULT, 494 TokenType.DELETE, 495 TokenType.DESC, 496 TokenType.DESCRIBE, 497 TokenType.DETACH, 498 TokenType.DICTIONARY, 499 TokenType.DIV, 500 TokenType.END, 501 TokenType.EXECUTE, 502 TokenType.EXPORT, 503 TokenType.ESCAPE, 504 TokenType.FALSE, 505 TokenType.FIRST, 506 TokenType.FILTER, 507 TokenType.FINAL, 508 TokenType.FORMAT, 509 TokenType.FULL, 510 TokenType.IDENTIFIER, 511 TokenType.IS, 512 TokenType.ISNULL, 513 TokenType.INTERVAL, 514 TokenType.KEEP, 515 TokenType.KILL, 516 TokenType.LEFT, 517 TokenType.LIMIT, 518 TokenType.LOAD, 519 TokenType.MERGE, 520 TokenType.NATURAL, 521 TokenType.NEXT, 522 TokenType.OFFSET, 523 TokenType.OPERATOR, 524 TokenType.ORDINALITY, 525 TokenType.OVERLAPS, 526 TokenType.OVERWRITE, 527 TokenType.PARTITION, 528 TokenType.PERCENT, 529 TokenType.PIVOT, 530 TokenType.PRAGMA, 531 TokenType.PUT, 532 TokenType.RANGE, 533 TokenType.RECURSIVE, 534 TokenType.REFERENCES, 535 TokenType.REFRESH, 536 TokenType.RENAME, 537 TokenType.REPLACE, 538 TokenType.RIGHT, 539 TokenType.ROLLUP, 540 TokenType.ROW, 541 TokenType.ROWS, 542 TokenType.SEMI, 543 TokenType.SET, 544 TokenType.SETTINGS, 545 TokenType.SHOW, 546 TokenType.TEMPORARY, 547 TokenType.TOP, 548 TokenType.TRUE, 549 TokenType.TRUNCATE, 550 TokenType.UNIQUE, 551 TokenType.UNNEST, 552 TokenType.UNPIVOT, 553 TokenType.UPDATE, 554 TokenType.USE, 555 TokenType.VOLATILE, 556 TokenType.WINDOW, 557 *CREATABLES, 558 *SUBQUERY_PREDICATES, 559 *TYPE_TOKENS, 560 *NO_PAREN_FUNCTIONS, 561 } 562 ID_VAR_TOKENS.remove(TokenType.UNION) 563 564 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 565 TokenType.ANTI, 566 TokenType.APPLY, 567 TokenType.ASOF, 568 TokenType.FULL, 569 TokenType.LEFT, 570 TokenType.LOCK, 571 TokenType.NATURAL, 572 TokenType.RIGHT, 573 TokenType.SEMI, 574 TokenType.WINDOW, 575 } 576 577 ALIAS_TOKENS = ID_VAR_TOKENS 578 579 ARRAY_CONSTRUCTORS = { 580 "ARRAY": exp.Array, 581 "LIST": exp.List, 582 } 583 584 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 585 586 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 587 588 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 589 590 FUNC_TOKENS = { 591 TokenType.COLLATE, 592 TokenType.COMMAND, 593 TokenType.CURRENT_DATE, 594 TokenType.CURRENT_DATETIME, 595 TokenType.CURRENT_SCHEMA, 596 TokenType.CURRENT_TIMESTAMP, 597 TokenType.CURRENT_TIME, 598 TokenType.CURRENT_USER, 599 TokenType.FILTER, 600 TokenType.FIRST, 601 TokenType.FORMAT, 602 TokenType.GLOB, 603 TokenType.IDENTIFIER, 604 TokenType.INDEX, 605 TokenType.ISNULL, 606 TokenType.ILIKE, 607 TokenType.INSERT, 608 TokenType.LIKE, 609 TokenType.MERGE, 610 TokenType.NEXT, 611 TokenType.OFFSET, 612 TokenType.PRIMARY_KEY, 613 TokenType.RANGE, 614 TokenType.REPLACE, 615 TokenType.RLIKE, 616 TokenType.ROW, 617 TokenType.UNNEST, 618 TokenType.VAR, 619 TokenType.LEFT, 620 TokenType.RIGHT, 621 TokenType.SEQUENCE, 622 TokenType.DATE, 623 TokenType.DATETIME, 624 TokenType.TABLE, 625 TokenType.TIMESTAMP, 626 TokenType.TIMESTAMPTZ, 627 TokenType.TRUNCATE, 628 TokenType.WINDOW, 629 TokenType.XOR, 630 *TYPE_TOKENS, 631 *SUBQUERY_PREDICATES, 632 } 633 634 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 635 TokenType.AND: exp.And, 636 } 637 638 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 639 TokenType.COLON_EQ: exp.PropertyEQ, 640 } 641 642 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 643 TokenType.OR: exp.Or, 644 } 645 646 EQUALITY = { 647 TokenType.EQ: exp.EQ, 648 TokenType.NEQ: exp.NEQ, 649 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 650 } 651 652 COMPARISON = { 653 TokenType.GT: exp.GT, 654 TokenType.GTE: exp.GTE, 655 TokenType.LT: exp.LT, 656 TokenType.LTE: exp.LTE, 657 } 658 659 BITWISE = { 660 TokenType.AMP: exp.BitwiseAnd, 661 TokenType.CARET: exp.BitwiseXor, 662 TokenType.PIPE: exp.BitwiseOr, 663 } 664 665 TERM = { 666 TokenType.DASH: exp.Sub, 667 TokenType.PLUS: exp.Add, 668 TokenType.MOD: exp.Mod, 669 TokenType.COLLATE: exp.Collate, 670 } 671 672 FACTOR = { 673 TokenType.DIV: exp.IntDiv, 674 TokenType.LR_ARROW: exp.Distance, 675 TokenType.SLASH: exp.Div, 676 TokenType.STAR: exp.Mul, 677 } 678 679 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 680 681 TIMES = { 682 TokenType.TIME, 683 TokenType.TIMETZ, 684 } 685 686 TIMESTAMPS = { 687 TokenType.TIMESTAMP, 688 TokenType.TIMESTAMPTZ, 689 TokenType.TIMESTAMPLTZ, 690 *TIMES, 691 } 692 693 SET_OPERATIONS = { 694 TokenType.UNION, 695 TokenType.INTERSECT, 696 TokenType.EXCEPT, 697 } 698 699 JOIN_METHODS = { 700 TokenType.ASOF, 701 TokenType.NATURAL, 702 TokenType.POSITIONAL, 703 } 704 705 JOIN_SIDES = { 706 TokenType.LEFT, 707 TokenType.RIGHT, 708 TokenType.FULL, 709 } 710 711 JOIN_KINDS = { 712 TokenType.ANTI, 713 TokenType.CROSS, 714 TokenType.INNER, 715 TokenType.OUTER, 716 TokenType.SEMI, 717 TokenType.STRAIGHT_JOIN, 718 } 719 720 JOIN_HINTS: t.Set[str] = set() 721 722 LAMBDAS = { 723 TokenType.ARROW: lambda self, expressions: self.expression( 724 exp.Lambda, 725 this=self._replace_lambda( 726 self._parse_assignment(), 727 expressions, 728 ), 729 expressions=expressions, 730 ), 731 TokenType.FARROW: lambda self, expressions: self.expression( 732 exp.Kwarg, 733 this=exp.var(expressions[0].name), 734 expression=self._parse_assignment(), 735 ), 736 } 737 738 COLUMN_OPERATORS = { 739 TokenType.DOT: None, 740 TokenType.DOTCOLON: lambda self, this, to: self.expression( 741 exp.JSONCast, 742 this=this, 743 to=to, 744 ), 745 TokenType.DCOLON: lambda self, this, to: self.expression( 746 exp.Cast if self.STRICT_CAST else exp.TryCast, 747 this=this, 748 to=to, 749 ), 750 TokenType.ARROW: lambda self, this, path: self.expression( 751 exp.JSONExtract, 752 this=this, 753 expression=self.dialect.to_json_path(path), 754 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 755 ), 756 TokenType.DARROW: lambda self, this, path: self.expression( 757 exp.JSONExtractScalar, 758 this=this, 759 expression=self.dialect.to_json_path(path), 760 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 761 ), 762 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 763 exp.JSONBExtract, 764 this=this, 765 expression=path, 766 ), 767 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 768 exp.JSONBExtractScalar, 769 this=this, 770 expression=path, 771 ), 772 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 773 exp.JSONBContains, 774 this=this, 775 expression=key, 776 ), 777 } 778 779 EXPRESSION_PARSERS = { 780 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 781 exp.Column: lambda self: self._parse_column(), 782 exp.Condition: lambda self: self._parse_assignment(), 783 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 784 exp.Expression: lambda self: self._parse_expression(), 785 exp.From: lambda self: self._parse_from(joins=True), 786 exp.Group: lambda self: self._parse_group(), 787 exp.Having: lambda self: self._parse_having(), 788 exp.Hint: lambda self: self._parse_hint_body(), 789 exp.Identifier: lambda self: self._parse_id_var(), 790 exp.Join: lambda self: self._parse_join(), 791 exp.Lambda: lambda self: self._parse_lambda(), 792 exp.Lateral: lambda self: self._parse_lateral(), 793 exp.Limit: lambda self: self._parse_limit(), 794 exp.Offset: lambda self: self._parse_offset(), 795 exp.Order: lambda self: self._parse_order(), 796 exp.Ordered: lambda self: self._parse_ordered(), 797 exp.Properties: lambda self: self._parse_properties(), 798 exp.Qualify: lambda self: self._parse_qualify(), 799 exp.Returning: lambda self: self._parse_returning(), 800 exp.Select: lambda self: self._parse_select(), 801 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 802 exp.Table: lambda self: self._parse_table_parts(), 803 exp.TableAlias: lambda self: self._parse_table_alias(), 804 exp.Tuple: lambda self: self._parse_value(), 805 exp.Whens: lambda self: self._parse_when_matched(), 806 exp.Where: lambda self: self._parse_where(), 807 exp.Window: lambda self: self._parse_named_window(), 808 exp.With: lambda self: self._parse_with(), 809 "JOIN_TYPE": lambda self: self._parse_join_parts(), 810 } 811 812 STATEMENT_PARSERS = { 813 TokenType.ALTER: lambda self: self._parse_alter(), 814 TokenType.ANALYZE: lambda self: self._parse_analyze(), 815 TokenType.BEGIN: lambda self: self._parse_transaction(), 816 TokenType.CACHE: lambda self: self._parse_cache(), 817 TokenType.COMMENT: lambda self: self._parse_comment(), 818 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 819 TokenType.COPY: lambda self: self._parse_copy(), 820 TokenType.CREATE: lambda self: self._parse_create(), 821 TokenType.DELETE: lambda self: self._parse_delete(), 822 TokenType.DESC: lambda self: self._parse_describe(), 823 TokenType.DESCRIBE: lambda self: self._parse_describe(), 824 TokenType.DROP: lambda self: self._parse_drop(), 825 TokenType.GRANT: lambda self: self._parse_grant(), 826 TokenType.INSERT: lambda self: self._parse_insert(), 827 TokenType.KILL: lambda self: self._parse_kill(), 828 TokenType.LOAD: lambda self: self._parse_load(), 829 TokenType.MERGE: lambda self: self._parse_merge(), 830 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 831 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 832 TokenType.REFRESH: lambda self: self._parse_refresh(), 833 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 834 TokenType.SET: lambda self: self._parse_set(), 835 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 836 TokenType.UNCACHE: lambda self: self._parse_uncache(), 837 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 838 TokenType.UPDATE: lambda self: self._parse_update(), 839 TokenType.USE: lambda self: self._parse_use(), 840 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 841 } 842 843 UNARY_PARSERS = { 844 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 845 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 846 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 847 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 848 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 849 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 850 } 851 852 STRING_PARSERS = { 853 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 854 exp.RawString, this=token.text 855 ), 856 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 857 exp.National, this=token.text 858 ), 859 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 860 TokenType.STRING: lambda self, token: self.expression( 861 exp.Literal, this=token.text, is_string=True 862 ), 863 TokenType.UNICODE_STRING: lambda self, token: self.expression( 864 exp.UnicodeString, 865 this=token.text, 866 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 867 ), 868 } 869 870 NUMERIC_PARSERS = { 871 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 872 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 873 TokenType.HEX_STRING: lambda self, token: self.expression( 874 exp.HexString, 875 this=token.text, 876 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 877 ), 878 TokenType.NUMBER: lambda self, token: self.expression( 879 exp.Literal, this=token.text, is_string=False 880 ), 881 } 882 883 PRIMARY_PARSERS = { 884 **STRING_PARSERS, 885 **NUMERIC_PARSERS, 886 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 887 TokenType.NULL: lambda self, _: self.expression(exp.Null), 888 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 889 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 890 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 891 TokenType.STAR: lambda self, _: self._parse_star_ops(), 892 } 893 894 PLACEHOLDER_PARSERS = { 895 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 896 TokenType.PARAMETER: lambda self: self._parse_parameter(), 897 TokenType.COLON: lambda self: ( 898 self.expression(exp.Placeholder, this=self._prev.text) 899 if self._match_set(self.ID_VAR_TOKENS) 900 else None 901 ), 902 } 903 904 RANGE_PARSERS = { 905 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 906 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 907 TokenType.GLOB: binary_range_parser(exp.Glob), 908 TokenType.ILIKE: binary_range_parser(exp.ILike), 909 TokenType.IN: lambda self, this: self._parse_in(this), 910 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 911 TokenType.IS: lambda self, this: self._parse_is(this), 912 TokenType.LIKE: binary_range_parser(exp.Like), 913 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 914 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 915 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 916 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 917 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 918 } 919 920 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 921 "ALLOWED_VALUES": lambda self: self.expression( 922 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 923 ), 924 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 925 "AUTO": lambda self: self._parse_auto_property(), 926 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 927 "BACKUP": lambda self: self.expression( 928 exp.BackupProperty, this=self._parse_var(any_token=True) 929 ), 930 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 931 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 932 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 933 "CHECKSUM": lambda self: self._parse_checksum(), 934 "CLUSTER BY": lambda self: self._parse_cluster(), 935 "CLUSTERED": lambda self: self._parse_clustered_by(), 936 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 937 exp.CollateProperty, **kwargs 938 ), 939 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 940 "CONTAINS": lambda self: self._parse_contains_property(), 941 "COPY": lambda self: self._parse_copy_property(), 942 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 943 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 944 "DEFINER": lambda self: self._parse_definer(), 945 "DETERMINISTIC": lambda self: self.expression( 946 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 947 ), 948 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 949 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 950 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 951 "DISTKEY": lambda self: self._parse_distkey(), 952 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 953 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 954 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 955 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 956 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 957 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 958 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 959 "FREESPACE": lambda self: self._parse_freespace(), 960 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 961 "HEAP": lambda self: self.expression(exp.HeapProperty), 962 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 963 "IMMUTABLE": lambda self: self.expression( 964 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 965 ), 966 "INHERITS": lambda self: self.expression( 967 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 968 ), 969 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 970 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 971 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 972 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 973 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 974 "LIKE": lambda self: self._parse_create_like(), 975 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 976 "LOCK": lambda self: self._parse_locking(), 977 "LOCKING": lambda self: self._parse_locking(), 978 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 979 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 980 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 981 "MODIFIES": lambda self: self._parse_modifies_property(), 982 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 983 "NO": lambda self: self._parse_no_property(), 984 "ON": lambda self: self._parse_on_property(), 985 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 986 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 987 "PARTITION": lambda self: self._parse_partitioned_of(), 988 "PARTITION BY": lambda self: self._parse_partitioned_by(), 989 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 990 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 991 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 992 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 993 "READS": lambda self: self._parse_reads_property(), 994 "REMOTE": lambda self: self._parse_remote_with_connection(), 995 "RETURNS": lambda self: self._parse_returns(), 996 "STRICT": lambda self: self.expression(exp.StrictProperty), 997 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 998 "ROW": lambda self: self._parse_row(), 999 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1000 "SAMPLE": lambda self: self.expression( 1001 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1002 ), 1003 "SECURE": lambda self: self.expression(exp.SecureProperty), 1004 "SECURITY": lambda self: self._parse_security(), 1005 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1006 "SETTINGS": lambda self: self._parse_settings_property(), 1007 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1008 "SORTKEY": lambda self: self._parse_sortkey(), 1009 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1010 "STABLE": lambda self: self.expression( 1011 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1012 ), 1013 "STORED": lambda self: self._parse_stored(), 1014 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1015 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1016 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1017 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1018 "TO": lambda self: self._parse_to_table(), 1019 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1020 "TRANSFORM": lambda self: self.expression( 1021 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1022 ), 1023 "TTL": lambda self: self._parse_ttl(), 1024 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1025 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1026 "VOLATILE": lambda self: self._parse_volatile_property(), 1027 "WITH": lambda self: self._parse_with_property(), 1028 } 1029 1030 CONSTRAINT_PARSERS = { 1031 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1032 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1033 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1034 "CHARACTER SET": lambda self: self.expression( 1035 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1036 ), 1037 "CHECK": lambda self: self.expression( 1038 exp.CheckColumnConstraint, 1039 this=self._parse_wrapped(self._parse_assignment), 1040 enforced=self._match_text_seq("ENFORCED"), 1041 ), 1042 "COLLATE": lambda self: self.expression( 1043 exp.CollateColumnConstraint, 1044 this=self._parse_identifier() or self._parse_column(), 1045 ), 1046 "COMMENT": lambda self: self.expression( 1047 exp.CommentColumnConstraint, this=self._parse_string() 1048 ), 1049 "COMPRESS": lambda self: self._parse_compress(), 1050 "CLUSTERED": lambda self: self.expression( 1051 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1052 ), 1053 "NONCLUSTERED": lambda self: self.expression( 1054 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1055 ), 1056 "DEFAULT": lambda self: self.expression( 1057 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1058 ), 1059 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1060 "EPHEMERAL": lambda self: self.expression( 1061 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1062 ), 1063 "EXCLUDE": lambda self: self.expression( 1064 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1065 ), 1066 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1067 "FORMAT": lambda self: self.expression( 1068 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1069 ), 1070 "GENERATED": lambda self: self._parse_generated_as_identity(), 1071 "IDENTITY": lambda self: self._parse_auto_increment(), 1072 "INLINE": lambda self: self._parse_inline(), 1073 "LIKE": lambda self: self._parse_create_like(), 1074 "NOT": lambda self: self._parse_not_constraint(), 1075 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1076 "ON": lambda self: ( 1077 self._match(TokenType.UPDATE) 1078 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1079 ) 1080 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1081 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1082 "PERIOD": lambda self: self._parse_period_for_system_time(), 1083 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1084 "REFERENCES": lambda self: self._parse_references(match=False), 1085 "TITLE": lambda self: self.expression( 1086 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1087 ), 1088 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1089 "UNIQUE": lambda self: self._parse_unique(), 1090 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1091 "WATERMARK": lambda self: self.expression( 1092 exp.WatermarkColumnConstraint, 1093 this=self._match(TokenType.FOR) and self._parse_column(), 1094 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1095 ), 1096 "WITH": lambda self: self.expression( 1097 exp.Properties, expressions=self._parse_wrapped_properties() 1098 ), 1099 } 1100 1101 ALTER_PARSERS = { 1102 "ADD": lambda self: self._parse_alter_table_add(), 1103 "AS": lambda self: self._parse_select(), 1104 "ALTER": lambda self: self._parse_alter_table_alter(), 1105 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1106 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1107 "DROP": lambda self: self._parse_alter_table_drop(), 1108 "RENAME": lambda self: self._parse_alter_table_rename(), 1109 "SET": lambda self: self._parse_alter_table_set(), 1110 "SWAP": lambda self: self.expression( 1111 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1112 ), 1113 } 1114 1115 ALTER_ALTER_PARSERS = { 1116 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1117 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1118 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1119 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1120 } 1121 1122 SCHEMA_UNNAMED_CONSTRAINTS = { 1123 "CHECK", 1124 "EXCLUDE", 1125 "FOREIGN KEY", 1126 "LIKE", 1127 "PERIOD", 1128 "PRIMARY KEY", 1129 "UNIQUE", 1130 "WATERMARK", 1131 } 1132 1133 NO_PAREN_FUNCTION_PARSERS = { 1134 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1135 "CASE": lambda self: self._parse_case(), 1136 "CONNECT_BY_ROOT": lambda self: self.expression( 1137 exp.ConnectByRoot, this=self._parse_column() 1138 ), 1139 "IF": lambda self: self._parse_if(), 1140 } 1141 1142 INVALID_FUNC_NAME_TOKENS = { 1143 TokenType.IDENTIFIER, 1144 TokenType.STRING, 1145 } 1146 1147 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1148 1149 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1150 1151 FUNCTION_PARSERS = { 1152 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1153 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1154 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1155 "DECODE": lambda self: self._parse_decode(), 1156 "EXTRACT": lambda self: self._parse_extract(), 1157 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1158 "GAP_FILL": lambda self: self._parse_gap_fill(), 1159 "JSON_OBJECT": lambda self: self._parse_json_object(), 1160 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1161 "JSON_TABLE": lambda self: self._parse_json_table(), 1162 "MATCH": lambda self: self._parse_match_against(), 1163 "NORMALIZE": lambda self: self._parse_normalize(), 1164 "OPENJSON": lambda self: self._parse_open_json(), 1165 "OVERLAY": lambda self: self._parse_overlay(), 1166 "POSITION": lambda self: self._parse_position(), 1167 "PREDICT": lambda self: self._parse_predict(), 1168 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1169 "STRING_AGG": lambda self: self._parse_string_agg(), 1170 "SUBSTRING": lambda self: self._parse_substring(), 1171 "TRIM": lambda self: self._parse_trim(), 1172 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1173 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1174 "XMLELEMENT": lambda self: self.expression( 1175 exp.XMLElement, 1176 this=self._match_text_seq("NAME") and self._parse_id_var(), 1177 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1178 ), 1179 "XMLTABLE": lambda self: self._parse_xml_table(), 1180 } 1181 1182 QUERY_MODIFIER_PARSERS = { 1183 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1184 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1185 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1186 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1187 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1188 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1189 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1190 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1191 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1192 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1193 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1194 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1195 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1196 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1197 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1198 TokenType.CLUSTER_BY: lambda self: ( 1199 "cluster", 1200 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1201 ), 1202 TokenType.DISTRIBUTE_BY: lambda self: ( 1203 "distribute", 1204 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1205 ), 1206 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1207 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1208 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1209 } 1210 1211 SET_PARSERS = { 1212 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1213 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1214 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1215 "TRANSACTION": lambda self: self._parse_set_transaction(), 1216 } 1217 1218 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1219 1220 TYPE_LITERAL_PARSERS = { 1221 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1222 } 1223 1224 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1225 1226 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1227 1228 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1229 1230 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1231 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1232 "ISOLATION": ( 1233 ("LEVEL", "REPEATABLE", "READ"), 1234 ("LEVEL", "READ", "COMMITTED"), 1235 ("LEVEL", "READ", "UNCOMITTED"), 1236 ("LEVEL", "SERIALIZABLE"), 1237 ), 1238 "READ": ("WRITE", "ONLY"), 1239 } 1240 1241 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1242 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1243 ) 1244 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1245 1246 CREATE_SEQUENCE: OPTIONS_TYPE = { 1247 "SCALE": ("EXTEND", "NOEXTEND"), 1248 "SHARD": ("EXTEND", "NOEXTEND"), 1249 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1250 **dict.fromkeys( 1251 ( 1252 "SESSION", 1253 "GLOBAL", 1254 "KEEP", 1255 "NOKEEP", 1256 "ORDER", 1257 "NOORDER", 1258 "NOCACHE", 1259 "CYCLE", 1260 "NOCYCLE", 1261 "NOMINVALUE", 1262 "NOMAXVALUE", 1263 "NOSCALE", 1264 "NOSHARD", 1265 ), 1266 tuple(), 1267 ), 1268 } 1269 1270 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1271 1272 USABLES: OPTIONS_TYPE = dict.fromkeys( 1273 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1274 ) 1275 1276 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1277 1278 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1279 "TYPE": ("EVOLUTION",), 1280 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1281 } 1282 1283 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1284 1285 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1286 1287 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1288 "NOT": ("ENFORCED",), 1289 "MATCH": ( 1290 "FULL", 1291 "PARTIAL", 1292 "SIMPLE", 1293 ), 1294 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1295 "USING": ( 1296 "BTREE", 1297 "HASH", 1298 ), 1299 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1300 } 1301 1302 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1303 1304 CLONE_KEYWORDS = {"CLONE", "COPY"} 1305 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1306 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1307 1308 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1309 1310 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1311 1312 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1313 1314 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1315 1316 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1317 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1318 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1319 1320 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1321 1322 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1323 1324 ADD_CONSTRAINT_TOKENS = { 1325 TokenType.CONSTRAINT, 1326 TokenType.FOREIGN_KEY, 1327 TokenType.INDEX, 1328 TokenType.KEY, 1329 TokenType.PRIMARY_KEY, 1330 TokenType.UNIQUE, 1331 } 1332 1333 DISTINCT_TOKENS = {TokenType.DISTINCT} 1334 1335 NULL_TOKENS = {TokenType.NULL} 1336 1337 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1338 1339 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1340 1341 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1342 1343 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1344 1345 ODBC_DATETIME_LITERALS = { 1346 "d": exp.Date, 1347 "t": exp.Time, 1348 "ts": exp.Timestamp, 1349 } 1350 1351 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1352 1353 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1354 1355 # The style options for the DESCRIBE statement 1356 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1357 1358 # The style options for the ANALYZE statement 1359 ANALYZE_STYLES = { 1360 "BUFFER_USAGE_LIMIT", 1361 "FULL", 1362 "LOCAL", 1363 "NO_WRITE_TO_BINLOG", 1364 "SAMPLE", 1365 "SKIP_LOCKED", 1366 "VERBOSE", 1367 } 1368 1369 ANALYZE_EXPRESSION_PARSERS = { 1370 "ALL": lambda self: self._parse_analyze_columns(), 1371 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1372 "DELETE": lambda self: self._parse_analyze_delete(), 1373 "DROP": lambda self: self._parse_analyze_histogram(), 1374 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1375 "LIST": lambda self: self._parse_analyze_list(), 1376 "PREDICATE": lambda self: self._parse_analyze_columns(), 1377 "UPDATE": lambda self: self._parse_analyze_histogram(), 1378 "VALIDATE": lambda self: self._parse_analyze_validate(), 1379 } 1380 1381 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1382 1383 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1384 1385 OPERATION_MODIFIERS: t.Set[str] = set() 1386 1387 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1388 1389 STRICT_CAST = True 1390 1391 PREFIXED_PIVOT_COLUMNS = False 1392 IDENTIFY_PIVOT_STRINGS = False 1393 1394 LOG_DEFAULTS_TO_LN = False 1395 1396 # Whether ADD is present for each column added by ALTER TABLE 1397 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1398 1399 # Whether the table sample clause expects CSV syntax 1400 TABLESAMPLE_CSV = False 1401 1402 # The default method used for table sampling 1403 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1404 1405 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1406 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1407 1408 # Whether the TRIM function expects the characters to trim as its first argument 1409 TRIM_PATTERN_FIRST = False 1410 1411 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1412 STRING_ALIASES = False 1413 1414 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1415 MODIFIERS_ATTACHED_TO_SET_OP = True 1416 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1417 1418 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1419 NO_PAREN_IF_COMMANDS = True 1420 1421 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1422 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1423 1424 # Whether the `:` operator is used to extract a value from a VARIANT column 1425 COLON_IS_VARIANT_EXTRACT = False 1426 1427 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1428 # If this is True and '(' is not found, the keyword will be treated as an identifier 1429 VALUES_FOLLOWED_BY_PAREN = True 1430 1431 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1432 SUPPORTS_IMPLICIT_UNNEST = False 1433 1434 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1435 INTERVAL_SPANS = True 1436 1437 # Whether a PARTITION clause can follow a table reference 1438 SUPPORTS_PARTITION_SELECTION = False 1439 1440 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1441 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1442 1443 # Whether the 'AS' keyword is optional in the CTE definition syntax 1444 OPTIONAL_ALIAS_TOKEN_CTE = True 1445 1446 __slots__ = ( 1447 "error_level", 1448 "error_message_context", 1449 "max_errors", 1450 "dialect", 1451 "sql", 1452 "errors", 1453 "_tokens", 1454 "_index", 1455 "_curr", 1456 "_next", 1457 "_prev", 1458 "_prev_comments", 1459 ) 1460 1461 # Autofilled 1462 SHOW_TRIE: t.Dict = {} 1463 SET_TRIE: t.Dict = {} 1464 1465 def __init__( 1466 self, 1467 error_level: t.Optional[ErrorLevel] = None, 1468 error_message_context: int = 100, 1469 max_errors: int = 3, 1470 dialect: DialectType = None, 1471 ): 1472 from sqlglot.dialects import Dialect 1473 1474 self.error_level = error_level or ErrorLevel.IMMEDIATE 1475 self.error_message_context = error_message_context 1476 self.max_errors = max_errors 1477 self.dialect = Dialect.get_or_raise(dialect) 1478 self.reset() 1479 1480 def reset(self): 1481 self.sql = "" 1482 self.errors = [] 1483 self._tokens = [] 1484 self._index = 0 1485 self._curr = None 1486 self._next = None 1487 self._prev = None 1488 self._prev_comments = None 1489 1490 def parse( 1491 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1492 ) -> t.List[t.Optional[exp.Expression]]: 1493 """ 1494 Parses a list of tokens and returns a list of syntax trees, one tree 1495 per parsed SQL statement. 1496 1497 Args: 1498 raw_tokens: The list of tokens. 1499 sql: The original SQL string, used to produce helpful debug messages. 1500 1501 Returns: 1502 The list of the produced syntax trees. 1503 """ 1504 return self._parse( 1505 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1506 ) 1507 1508 def parse_into( 1509 self, 1510 expression_types: exp.IntoType, 1511 raw_tokens: t.List[Token], 1512 sql: t.Optional[str] = None, 1513 ) -> t.List[t.Optional[exp.Expression]]: 1514 """ 1515 Parses a list of tokens into a given Expression type. If a collection of Expression 1516 types is given instead, this method will try to parse the token list into each one 1517 of them, stopping at the first for which the parsing succeeds. 1518 1519 Args: 1520 expression_types: The expression type(s) to try and parse the token list into. 1521 raw_tokens: The list of tokens. 1522 sql: The original SQL string, used to produce helpful debug messages. 1523 1524 Returns: 1525 The target Expression. 1526 """ 1527 errors = [] 1528 for expression_type in ensure_list(expression_types): 1529 parser = self.EXPRESSION_PARSERS.get(expression_type) 1530 if not parser: 1531 raise TypeError(f"No parser registered for {expression_type}") 1532 1533 try: 1534 return self._parse(parser, raw_tokens, sql) 1535 except ParseError as e: 1536 e.errors[0]["into_expression"] = expression_type 1537 errors.append(e) 1538 1539 raise ParseError( 1540 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1541 errors=merge_errors(errors), 1542 ) from errors[-1] 1543 1544 def _parse( 1545 self, 1546 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1547 raw_tokens: t.List[Token], 1548 sql: t.Optional[str] = None, 1549 ) -> t.List[t.Optional[exp.Expression]]: 1550 self.reset() 1551 self.sql = sql or "" 1552 1553 total = len(raw_tokens) 1554 chunks: t.List[t.List[Token]] = [[]] 1555 1556 for i, token in enumerate(raw_tokens): 1557 if token.token_type == TokenType.SEMICOLON: 1558 if token.comments: 1559 chunks.append([token]) 1560 1561 if i < total - 1: 1562 chunks.append([]) 1563 else: 1564 chunks[-1].append(token) 1565 1566 expressions = [] 1567 1568 for tokens in chunks: 1569 self._index = -1 1570 self._tokens = tokens 1571 self._advance() 1572 1573 expressions.append(parse_method(self)) 1574 1575 if self._index < len(self._tokens): 1576 self.raise_error("Invalid expression / Unexpected token") 1577 1578 self.check_errors() 1579 1580 return expressions 1581 1582 def check_errors(self) -> None: 1583 """Logs or raises any found errors, depending on the chosen error level setting.""" 1584 if self.error_level == ErrorLevel.WARN: 1585 for error in self.errors: 1586 logger.error(str(error)) 1587 elif self.error_level == ErrorLevel.RAISE and self.errors: 1588 raise ParseError( 1589 concat_messages(self.errors, self.max_errors), 1590 errors=merge_errors(self.errors), 1591 ) 1592 1593 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1594 """ 1595 Appends an error in the list of recorded errors or raises it, depending on the chosen 1596 error level setting. 1597 """ 1598 token = token or self._curr or self._prev or Token.string("") 1599 start = token.start 1600 end = token.end + 1 1601 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1602 highlight = self.sql[start:end] 1603 end_context = self.sql[end : end + self.error_message_context] 1604 1605 error = ParseError.new( 1606 f"{message}. Line {token.line}, Col: {token.col}.\n" 1607 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1608 description=message, 1609 line=token.line, 1610 col=token.col, 1611 start_context=start_context, 1612 highlight=highlight, 1613 end_context=end_context, 1614 ) 1615 1616 if self.error_level == ErrorLevel.IMMEDIATE: 1617 raise error 1618 1619 self.errors.append(error) 1620 1621 def expression( 1622 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1623 ) -> E: 1624 """ 1625 Creates a new, validated Expression. 1626 1627 Args: 1628 exp_class: The expression class to instantiate. 1629 comments: An optional list of comments to attach to the expression. 1630 kwargs: The arguments to set for the expression along with their respective values. 1631 1632 Returns: 1633 The target expression. 1634 """ 1635 instance = exp_class(**kwargs) 1636 instance.add_comments(comments) if comments else self._add_comments(instance) 1637 return self.validate_expression(instance) 1638 1639 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1640 if expression and self._prev_comments: 1641 expression.add_comments(self._prev_comments) 1642 self._prev_comments = None 1643 1644 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1645 """ 1646 Validates an Expression, making sure that all its mandatory arguments are set. 1647 1648 Args: 1649 expression: The expression to validate. 1650 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1651 1652 Returns: 1653 The validated expression. 1654 """ 1655 if self.error_level != ErrorLevel.IGNORE: 1656 for error_message in expression.error_messages(args): 1657 self.raise_error(error_message) 1658 1659 return expression 1660 1661 def _find_sql(self, start: Token, end: Token) -> str: 1662 return self.sql[start.start : end.end + 1] 1663 1664 def _is_connected(self) -> bool: 1665 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1666 1667 def _advance(self, times: int = 1) -> None: 1668 self._index += times 1669 self._curr = seq_get(self._tokens, self._index) 1670 self._next = seq_get(self._tokens, self._index + 1) 1671 1672 if self._index > 0: 1673 self._prev = self._tokens[self._index - 1] 1674 self._prev_comments = self._prev.comments 1675 else: 1676 self._prev = None 1677 self._prev_comments = None 1678 1679 def _retreat(self, index: int) -> None: 1680 if index != self._index: 1681 self._advance(index - self._index) 1682 1683 def _warn_unsupported(self) -> None: 1684 if len(self._tokens) <= 1: 1685 return 1686 1687 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1688 # interested in emitting a warning for the one being currently processed. 1689 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1690 1691 logger.warning( 1692 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1693 ) 1694 1695 def _parse_command(self) -> exp.Command: 1696 self._warn_unsupported() 1697 return self.expression( 1698 exp.Command, 1699 comments=self._prev_comments, 1700 this=self._prev.text.upper(), 1701 expression=self._parse_string(), 1702 ) 1703 1704 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1705 """ 1706 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1707 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1708 solve this by setting & resetting the parser state accordingly 1709 """ 1710 index = self._index 1711 error_level = self.error_level 1712 1713 self.error_level = ErrorLevel.IMMEDIATE 1714 try: 1715 this = parse_method() 1716 except ParseError: 1717 this = None 1718 finally: 1719 if not this or retreat: 1720 self._retreat(index) 1721 self.error_level = error_level 1722 1723 return this 1724 1725 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1726 start = self._prev 1727 exists = self._parse_exists() if allow_exists else None 1728 1729 self._match(TokenType.ON) 1730 1731 materialized = self._match_text_seq("MATERIALIZED") 1732 kind = self._match_set(self.CREATABLES) and self._prev 1733 if not kind: 1734 return self._parse_as_command(start) 1735 1736 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1737 this = self._parse_user_defined_function(kind=kind.token_type) 1738 elif kind.token_type == TokenType.TABLE: 1739 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1740 elif kind.token_type == TokenType.COLUMN: 1741 this = self._parse_column() 1742 else: 1743 this = self._parse_id_var() 1744 1745 self._match(TokenType.IS) 1746 1747 return self.expression( 1748 exp.Comment, 1749 this=this, 1750 kind=kind.text, 1751 expression=self._parse_string(), 1752 exists=exists, 1753 materialized=materialized, 1754 ) 1755 1756 def _parse_to_table( 1757 self, 1758 ) -> exp.ToTableProperty: 1759 table = self._parse_table_parts(schema=True) 1760 return self.expression(exp.ToTableProperty, this=table) 1761 1762 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1763 def _parse_ttl(self) -> exp.Expression: 1764 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1765 this = self._parse_bitwise() 1766 1767 if self._match_text_seq("DELETE"): 1768 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1769 if self._match_text_seq("RECOMPRESS"): 1770 return self.expression( 1771 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1772 ) 1773 if self._match_text_seq("TO", "DISK"): 1774 return self.expression( 1775 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1776 ) 1777 if self._match_text_seq("TO", "VOLUME"): 1778 return self.expression( 1779 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1780 ) 1781 1782 return this 1783 1784 expressions = self._parse_csv(_parse_ttl_action) 1785 where = self._parse_where() 1786 group = self._parse_group() 1787 1788 aggregates = None 1789 if group and self._match(TokenType.SET): 1790 aggregates = self._parse_csv(self._parse_set_item) 1791 1792 return self.expression( 1793 exp.MergeTreeTTL, 1794 expressions=expressions, 1795 where=where, 1796 group=group, 1797 aggregates=aggregates, 1798 ) 1799 1800 def _parse_statement(self) -> t.Optional[exp.Expression]: 1801 if self._curr is None: 1802 return None 1803 1804 if self._match_set(self.STATEMENT_PARSERS): 1805 comments = self._prev_comments 1806 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1807 stmt.add_comments(comments, prepend=True) 1808 return stmt 1809 1810 if self._match_set(self.dialect.tokenizer.COMMANDS): 1811 return self._parse_command() 1812 1813 expression = self._parse_expression() 1814 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1815 return self._parse_query_modifiers(expression) 1816 1817 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1818 start = self._prev 1819 temporary = self._match(TokenType.TEMPORARY) 1820 materialized = self._match_text_seq("MATERIALIZED") 1821 1822 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1823 if not kind: 1824 return self._parse_as_command(start) 1825 1826 concurrently = self._match_text_seq("CONCURRENTLY") 1827 if_exists = exists or self._parse_exists() 1828 1829 if kind == "COLUMN": 1830 this = self._parse_column() 1831 else: 1832 this = self._parse_table_parts( 1833 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1834 ) 1835 1836 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1837 1838 if self._match(TokenType.L_PAREN, advance=False): 1839 expressions = self._parse_wrapped_csv(self._parse_types) 1840 else: 1841 expressions = None 1842 1843 return self.expression( 1844 exp.Drop, 1845 exists=if_exists, 1846 this=this, 1847 expressions=expressions, 1848 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1849 temporary=temporary, 1850 materialized=materialized, 1851 cascade=self._match_text_seq("CASCADE"), 1852 constraints=self._match_text_seq("CONSTRAINTS"), 1853 purge=self._match_text_seq("PURGE"), 1854 cluster=cluster, 1855 concurrently=concurrently, 1856 ) 1857 1858 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1859 return ( 1860 self._match_text_seq("IF") 1861 and (not not_ or self._match(TokenType.NOT)) 1862 and self._match(TokenType.EXISTS) 1863 ) 1864 1865 def _parse_create(self) -> exp.Create | exp.Command: 1866 # Note: this can't be None because we've matched a statement parser 1867 start = self._prev 1868 1869 replace = ( 1870 start.token_type == TokenType.REPLACE 1871 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1872 or self._match_pair(TokenType.OR, TokenType.ALTER) 1873 ) 1874 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1875 1876 unique = self._match(TokenType.UNIQUE) 1877 1878 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1879 clustered = True 1880 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1881 "COLUMNSTORE" 1882 ): 1883 clustered = False 1884 else: 1885 clustered = None 1886 1887 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1888 self._advance() 1889 1890 properties = None 1891 create_token = self._match_set(self.CREATABLES) and self._prev 1892 1893 if not create_token: 1894 # exp.Properties.Location.POST_CREATE 1895 properties = self._parse_properties() 1896 create_token = self._match_set(self.CREATABLES) and self._prev 1897 1898 if not properties or not create_token: 1899 return self._parse_as_command(start) 1900 1901 concurrently = self._match_text_seq("CONCURRENTLY") 1902 exists = self._parse_exists(not_=True) 1903 this = None 1904 expression: t.Optional[exp.Expression] = None 1905 indexes = None 1906 no_schema_binding = None 1907 begin = None 1908 end = None 1909 clone = None 1910 1911 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1912 nonlocal properties 1913 if properties and temp_props: 1914 properties.expressions.extend(temp_props.expressions) 1915 elif temp_props: 1916 properties = temp_props 1917 1918 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1919 this = self._parse_user_defined_function(kind=create_token.token_type) 1920 1921 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1922 extend_props(self._parse_properties()) 1923 1924 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1925 extend_props(self._parse_properties()) 1926 1927 if not expression: 1928 if self._match(TokenType.COMMAND): 1929 expression = self._parse_as_command(self._prev) 1930 else: 1931 begin = self._match(TokenType.BEGIN) 1932 return_ = self._match_text_seq("RETURN") 1933 1934 if self._match(TokenType.STRING, advance=False): 1935 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1936 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1937 expression = self._parse_string() 1938 extend_props(self._parse_properties()) 1939 else: 1940 expression = self._parse_user_defined_function_expression() 1941 1942 end = self._match_text_seq("END") 1943 1944 if return_: 1945 expression = self.expression(exp.Return, this=expression) 1946 elif create_token.token_type == TokenType.INDEX: 1947 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1948 if not self._match(TokenType.ON): 1949 index = self._parse_id_var() 1950 anonymous = False 1951 else: 1952 index = None 1953 anonymous = True 1954 1955 this = self._parse_index(index=index, anonymous=anonymous) 1956 elif create_token.token_type in self.DB_CREATABLES: 1957 table_parts = self._parse_table_parts( 1958 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1959 ) 1960 1961 # exp.Properties.Location.POST_NAME 1962 self._match(TokenType.COMMA) 1963 extend_props(self._parse_properties(before=True)) 1964 1965 this = self._parse_schema(this=table_parts) 1966 1967 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1968 extend_props(self._parse_properties()) 1969 1970 self._match(TokenType.ALIAS) 1971 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1972 # exp.Properties.Location.POST_ALIAS 1973 extend_props(self._parse_properties()) 1974 1975 if create_token.token_type == TokenType.SEQUENCE: 1976 expression = self._parse_types() 1977 extend_props(self._parse_properties()) 1978 else: 1979 expression = self._parse_ddl_select() 1980 1981 if create_token.token_type == TokenType.TABLE: 1982 # exp.Properties.Location.POST_EXPRESSION 1983 extend_props(self._parse_properties()) 1984 1985 indexes = [] 1986 while True: 1987 index = self._parse_index() 1988 1989 # exp.Properties.Location.POST_INDEX 1990 extend_props(self._parse_properties()) 1991 if not index: 1992 break 1993 else: 1994 self._match(TokenType.COMMA) 1995 indexes.append(index) 1996 elif create_token.token_type == TokenType.VIEW: 1997 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1998 no_schema_binding = True 1999 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2000 extend_props(self._parse_properties()) 2001 2002 shallow = self._match_text_seq("SHALLOW") 2003 2004 if self._match_texts(self.CLONE_KEYWORDS): 2005 copy = self._prev.text.lower() == "copy" 2006 clone = self.expression( 2007 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2008 ) 2009 2010 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2011 return self._parse_as_command(start) 2012 2013 create_kind_text = create_token.text.upper() 2014 return self.expression( 2015 exp.Create, 2016 this=this, 2017 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2018 replace=replace, 2019 refresh=refresh, 2020 unique=unique, 2021 expression=expression, 2022 exists=exists, 2023 properties=properties, 2024 indexes=indexes, 2025 no_schema_binding=no_schema_binding, 2026 begin=begin, 2027 end=end, 2028 clone=clone, 2029 concurrently=concurrently, 2030 clustered=clustered, 2031 ) 2032 2033 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2034 seq = exp.SequenceProperties() 2035 2036 options = [] 2037 index = self._index 2038 2039 while self._curr: 2040 self._match(TokenType.COMMA) 2041 if self._match_text_seq("INCREMENT"): 2042 self._match_text_seq("BY") 2043 self._match_text_seq("=") 2044 seq.set("increment", self._parse_term()) 2045 elif self._match_text_seq("MINVALUE"): 2046 seq.set("minvalue", self._parse_term()) 2047 elif self._match_text_seq("MAXVALUE"): 2048 seq.set("maxvalue", self._parse_term()) 2049 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2050 self._match_text_seq("=") 2051 seq.set("start", self._parse_term()) 2052 elif self._match_text_seq("CACHE"): 2053 # T-SQL allows empty CACHE which is initialized dynamically 2054 seq.set("cache", self._parse_number() or True) 2055 elif self._match_text_seq("OWNED", "BY"): 2056 # "OWNED BY NONE" is the default 2057 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2058 else: 2059 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2060 if opt: 2061 options.append(opt) 2062 else: 2063 break 2064 2065 seq.set("options", options if options else None) 2066 return None if self._index == index else seq 2067 2068 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2069 # only used for teradata currently 2070 self._match(TokenType.COMMA) 2071 2072 kwargs = { 2073 "no": self._match_text_seq("NO"), 2074 "dual": self._match_text_seq("DUAL"), 2075 "before": self._match_text_seq("BEFORE"), 2076 "default": self._match_text_seq("DEFAULT"), 2077 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2078 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2079 "after": self._match_text_seq("AFTER"), 2080 "minimum": self._match_texts(("MIN", "MINIMUM")), 2081 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2082 } 2083 2084 if self._match_texts(self.PROPERTY_PARSERS): 2085 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2086 try: 2087 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2088 except TypeError: 2089 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2090 2091 return None 2092 2093 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2094 return self._parse_wrapped_csv(self._parse_property) 2095 2096 def _parse_property(self) -> t.Optional[exp.Expression]: 2097 if self._match_texts(self.PROPERTY_PARSERS): 2098 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2099 2100 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2101 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2102 2103 if self._match_text_seq("COMPOUND", "SORTKEY"): 2104 return self._parse_sortkey(compound=True) 2105 2106 if self._match_text_seq("SQL", "SECURITY"): 2107 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2108 2109 index = self._index 2110 key = self._parse_column() 2111 2112 if not self._match(TokenType.EQ): 2113 self._retreat(index) 2114 return self._parse_sequence_properties() 2115 2116 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2117 if isinstance(key, exp.Column): 2118 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2119 2120 value = self._parse_bitwise() or self._parse_var(any_token=True) 2121 2122 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2123 if isinstance(value, exp.Column): 2124 value = exp.var(value.name) 2125 2126 return self.expression(exp.Property, this=key, value=value) 2127 2128 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2129 if self._match_text_seq("BY"): 2130 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2131 2132 self._match(TokenType.ALIAS) 2133 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2134 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2135 2136 return self.expression( 2137 exp.FileFormatProperty, 2138 this=( 2139 self.expression( 2140 exp.InputOutputFormat, 2141 input_format=input_format, 2142 output_format=output_format, 2143 ) 2144 if input_format or output_format 2145 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2146 ), 2147 ) 2148 2149 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2150 field = self._parse_field() 2151 if isinstance(field, exp.Identifier) and not field.quoted: 2152 field = exp.var(field) 2153 2154 return field 2155 2156 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2157 self._match(TokenType.EQ) 2158 self._match(TokenType.ALIAS) 2159 2160 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2161 2162 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2163 properties = [] 2164 while True: 2165 if before: 2166 prop = self._parse_property_before() 2167 else: 2168 prop = self._parse_property() 2169 if not prop: 2170 break 2171 for p in ensure_list(prop): 2172 properties.append(p) 2173 2174 if properties: 2175 return self.expression(exp.Properties, expressions=properties) 2176 2177 return None 2178 2179 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2180 return self.expression( 2181 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2182 ) 2183 2184 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2185 if self._match_texts(("DEFINER", "INVOKER")): 2186 security_specifier = self._prev.text.upper() 2187 return self.expression(exp.SecurityProperty, this=security_specifier) 2188 return None 2189 2190 def _parse_settings_property(self) -> exp.SettingsProperty: 2191 return self.expression( 2192 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2193 ) 2194 2195 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2196 if self._index >= 2: 2197 pre_volatile_token = self._tokens[self._index - 2] 2198 else: 2199 pre_volatile_token = None 2200 2201 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2202 return exp.VolatileProperty() 2203 2204 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2205 2206 def _parse_retention_period(self) -> exp.Var: 2207 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2208 number = self._parse_number() 2209 number_str = f"{number} " if number else "" 2210 unit = self._parse_var(any_token=True) 2211 return exp.var(f"{number_str}{unit}") 2212 2213 def _parse_system_versioning_property( 2214 self, with_: bool = False 2215 ) -> exp.WithSystemVersioningProperty: 2216 self._match(TokenType.EQ) 2217 prop = self.expression( 2218 exp.WithSystemVersioningProperty, 2219 **{ # type: ignore 2220 "on": True, 2221 "with": with_, 2222 }, 2223 ) 2224 2225 if self._match_text_seq("OFF"): 2226 prop.set("on", False) 2227 return prop 2228 2229 self._match(TokenType.ON) 2230 if self._match(TokenType.L_PAREN): 2231 while self._curr and not self._match(TokenType.R_PAREN): 2232 if self._match_text_seq("HISTORY_TABLE", "="): 2233 prop.set("this", self._parse_table_parts()) 2234 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2235 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2236 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2237 prop.set("retention_period", self._parse_retention_period()) 2238 2239 self._match(TokenType.COMMA) 2240 2241 return prop 2242 2243 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2244 self._match(TokenType.EQ) 2245 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2246 prop = self.expression(exp.DataDeletionProperty, on=on) 2247 2248 if self._match(TokenType.L_PAREN): 2249 while self._curr and not self._match(TokenType.R_PAREN): 2250 if self._match_text_seq("FILTER_COLUMN", "="): 2251 prop.set("filter_column", self._parse_column()) 2252 elif self._match_text_seq("RETENTION_PERIOD", "="): 2253 prop.set("retention_period", self._parse_retention_period()) 2254 2255 self._match(TokenType.COMMA) 2256 2257 return prop 2258 2259 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2260 kind = "HASH" 2261 expressions: t.Optional[t.List[exp.Expression]] = None 2262 if self._match_text_seq("BY", "HASH"): 2263 expressions = self._parse_wrapped_csv(self._parse_id_var) 2264 elif self._match_text_seq("BY", "RANDOM"): 2265 kind = "RANDOM" 2266 2267 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2268 buckets: t.Optional[exp.Expression] = None 2269 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2270 buckets = self._parse_number() 2271 2272 return self.expression( 2273 exp.DistributedByProperty, 2274 expressions=expressions, 2275 kind=kind, 2276 buckets=buckets, 2277 order=self._parse_order(), 2278 ) 2279 2280 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2281 self._match_text_seq("KEY") 2282 expressions = self._parse_wrapped_id_vars() 2283 return self.expression(expr_type, expressions=expressions) 2284 2285 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2286 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2287 prop = self._parse_system_versioning_property(with_=True) 2288 self._match_r_paren() 2289 return prop 2290 2291 if self._match(TokenType.L_PAREN, advance=False): 2292 return self._parse_wrapped_properties() 2293 2294 if self._match_text_seq("JOURNAL"): 2295 return self._parse_withjournaltable() 2296 2297 if self._match_texts(self.VIEW_ATTRIBUTES): 2298 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2299 2300 if self._match_text_seq("DATA"): 2301 return self._parse_withdata(no=False) 2302 elif self._match_text_seq("NO", "DATA"): 2303 return self._parse_withdata(no=True) 2304 2305 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2306 return self._parse_serde_properties(with_=True) 2307 2308 if self._match(TokenType.SCHEMA): 2309 return self.expression( 2310 exp.WithSchemaBindingProperty, 2311 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2312 ) 2313 2314 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2315 return self.expression( 2316 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2317 ) 2318 2319 if not self._next: 2320 return None 2321 2322 return self._parse_withisolatedloading() 2323 2324 def _parse_procedure_option(self) -> exp.Expression | None: 2325 if self._match_text_seq("EXECUTE", "AS"): 2326 return self.expression( 2327 exp.ExecuteAsProperty, 2328 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2329 or self._parse_string(), 2330 ) 2331 2332 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2333 2334 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2335 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2336 self._match(TokenType.EQ) 2337 2338 user = self._parse_id_var() 2339 self._match(TokenType.PARAMETER) 2340 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2341 2342 if not user or not host: 2343 return None 2344 2345 return exp.DefinerProperty(this=f"{user}@{host}") 2346 2347 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2348 self._match(TokenType.TABLE) 2349 self._match(TokenType.EQ) 2350 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2351 2352 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2353 return self.expression(exp.LogProperty, no=no) 2354 2355 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2356 return self.expression(exp.JournalProperty, **kwargs) 2357 2358 def _parse_checksum(self) -> exp.ChecksumProperty: 2359 self._match(TokenType.EQ) 2360 2361 on = None 2362 if self._match(TokenType.ON): 2363 on = True 2364 elif self._match_text_seq("OFF"): 2365 on = False 2366 2367 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2368 2369 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2370 return self.expression( 2371 exp.Cluster, 2372 expressions=( 2373 self._parse_wrapped_csv(self._parse_ordered) 2374 if wrapped 2375 else self._parse_csv(self._parse_ordered) 2376 ), 2377 ) 2378 2379 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2380 self._match_text_seq("BY") 2381 2382 self._match_l_paren() 2383 expressions = self._parse_csv(self._parse_column) 2384 self._match_r_paren() 2385 2386 if self._match_text_seq("SORTED", "BY"): 2387 self._match_l_paren() 2388 sorted_by = self._parse_csv(self._parse_ordered) 2389 self._match_r_paren() 2390 else: 2391 sorted_by = None 2392 2393 self._match(TokenType.INTO) 2394 buckets = self._parse_number() 2395 self._match_text_seq("BUCKETS") 2396 2397 return self.expression( 2398 exp.ClusteredByProperty, 2399 expressions=expressions, 2400 sorted_by=sorted_by, 2401 buckets=buckets, 2402 ) 2403 2404 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2405 if not self._match_text_seq("GRANTS"): 2406 self._retreat(self._index - 1) 2407 return None 2408 2409 return self.expression(exp.CopyGrantsProperty) 2410 2411 def _parse_freespace(self) -> exp.FreespaceProperty: 2412 self._match(TokenType.EQ) 2413 return self.expression( 2414 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2415 ) 2416 2417 def _parse_mergeblockratio( 2418 self, no: bool = False, default: bool = False 2419 ) -> exp.MergeBlockRatioProperty: 2420 if self._match(TokenType.EQ): 2421 return self.expression( 2422 exp.MergeBlockRatioProperty, 2423 this=self._parse_number(), 2424 percent=self._match(TokenType.PERCENT), 2425 ) 2426 2427 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2428 2429 def _parse_datablocksize( 2430 self, 2431 default: t.Optional[bool] = None, 2432 minimum: t.Optional[bool] = None, 2433 maximum: t.Optional[bool] = None, 2434 ) -> exp.DataBlocksizeProperty: 2435 self._match(TokenType.EQ) 2436 size = self._parse_number() 2437 2438 units = None 2439 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2440 units = self._prev.text 2441 2442 return self.expression( 2443 exp.DataBlocksizeProperty, 2444 size=size, 2445 units=units, 2446 default=default, 2447 minimum=minimum, 2448 maximum=maximum, 2449 ) 2450 2451 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2452 self._match(TokenType.EQ) 2453 always = self._match_text_seq("ALWAYS") 2454 manual = self._match_text_seq("MANUAL") 2455 never = self._match_text_seq("NEVER") 2456 default = self._match_text_seq("DEFAULT") 2457 2458 autotemp = None 2459 if self._match_text_seq("AUTOTEMP"): 2460 autotemp = self._parse_schema() 2461 2462 return self.expression( 2463 exp.BlockCompressionProperty, 2464 always=always, 2465 manual=manual, 2466 never=never, 2467 default=default, 2468 autotemp=autotemp, 2469 ) 2470 2471 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2472 index = self._index 2473 no = self._match_text_seq("NO") 2474 concurrent = self._match_text_seq("CONCURRENT") 2475 2476 if not self._match_text_seq("ISOLATED", "LOADING"): 2477 self._retreat(index) 2478 return None 2479 2480 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2481 return self.expression( 2482 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2483 ) 2484 2485 def _parse_locking(self) -> exp.LockingProperty: 2486 if self._match(TokenType.TABLE): 2487 kind = "TABLE" 2488 elif self._match(TokenType.VIEW): 2489 kind = "VIEW" 2490 elif self._match(TokenType.ROW): 2491 kind = "ROW" 2492 elif self._match_text_seq("DATABASE"): 2493 kind = "DATABASE" 2494 else: 2495 kind = None 2496 2497 if kind in ("DATABASE", "TABLE", "VIEW"): 2498 this = self._parse_table_parts() 2499 else: 2500 this = None 2501 2502 if self._match(TokenType.FOR): 2503 for_or_in = "FOR" 2504 elif self._match(TokenType.IN): 2505 for_or_in = "IN" 2506 else: 2507 for_or_in = None 2508 2509 if self._match_text_seq("ACCESS"): 2510 lock_type = "ACCESS" 2511 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2512 lock_type = "EXCLUSIVE" 2513 elif self._match_text_seq("SHARE"): 2514 lock_type = "SHARE" 2515 elif self._match_text_seq("READ"): 2516 lock_type = "READ" 2517 elif self._match_text_seq("WRITE"): 2518 lock_type = "WRITE" 2519 elif self._match_text_seq("CHECKSUM"): 2520 lock_type = "CHECKSUM" 2521 else: 2522 lock_type = None 2523 2524 override = self._match_text_seq("OVERRIDE") 2525 2526 return self.expression( 2527 exp.LockingProperty, 2528 this=this, 2529 kind=kind, 2530 for_or_in=for_or_in, 2531 lock_type=lock_type, 2532 override=override, 2533 ) 2534 2535 def _parse_partition_by(self) -> t.List[exp.Expression]: 2536 if self._match(TokenType.PARTITION_BY): 2537 return self._parse_csv(self._parse_assignment) 2538 return [] 2539 2540 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2541 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2542 if self._match_text_seq("MINVALUE"): 2543 return exp.var("MINVALUE") 2544 if self._match_text_seq("MAXVALUE"): 2545 return exp.var("MAXVALUE") 2546 return self._parse_bitwise() 2547 2548 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2549 expression = None 2550 from_expressions = None 2551 to_expressions = None 2552 2553 if self._match(TokenType.IN): 2554 this = self._parse_wrapped_csv(self._parse_bitwise) 2555 elif self._match(TokenType.FROM): 2556 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2557 self._match_text_seq("TO") 2558 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2559 elif self._match_text_seq("WITH", "(", "MODULUS"): 2560 this = self._parse_number() 2561 self._match_text_seq(",", "REMAINDER") 2562 expression = self._parse_number() 2563 self._match_r_paren() 2564 else: 2565 self.raise_error("Failed to parse partition bound spec.") 2566 2567 return self.expression( 2568 exp.PartitionBoundSpec, 2569 this=this, 2570 expression=expression, 2571 from_expressions=from_expressions, 2572 to_expressions=to_expressions, 2573 ) 2574 2575 # https://www.postgresql.org/docs/current/sql-createtable.html 2576 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2577 if not self._match_text_seq("OF"): 2578 self._retreat(self._index - 1) 2579 return None 2580 2581 this = self._parse_table(schema=True) 2582 2583 if self._match(TokenType.DEFAULT): 2584 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2585 elif self._match_text_seq("FOR", "VALUES"): 2586 expression = self._parse_partition_bound_spec() 2587 else: 2588 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2589 2590 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2591 2592 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2593 self._match(TokenType.EQ) 2594 return self.expression( 2595 exp.PartitionedByProperty, 2596 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2597 ) 2598 2599 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2600 if self._match_text_seq("AND", "STATISTICS"): 2601 statistics = True 2602 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2603 statistics = False 2604 else: 2605 statistics = None 2606 2607 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2608 2609 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2610 if self._match_text_seq("SQL"): 2611 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2612 return None 2613 2614 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2615 if self._match_text_seq("SQL", "DATA"): 2616 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2617 return None 2618 2619 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2620 if self._match_text_seq("PRIMARY", "INDEX"): 2621 return exp.NoPrimaryIndexProperty() 2622 if self._match_text_seq("SQL"): 2623 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2624 return None 2625 2626 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2627 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2628 return exp.OnCommitProperty() 2629 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2630 return exp.OnCommitProperty(delete=True) 2631 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2632 2633 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2634 if self._match_text_seq("SQL", "DATA"): 2635 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2636 return None 2637 2638 def _parse_distkey(self) -> exp.DistKeyProperty: 2639 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2640 2641 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2642 table = self._parse_table(schema=True) 2643 2644 options = [] 2645 while self._match_texts(("INCLUDING", "EXCLUDING")): 2646 this = self._prev.text.upper() 2647 2648 id_var = self._parse_id_var() 2649 if not id_var: 2650 return None 2651 2652 options.append( 2653 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2654 ) 2655 2656 return self.expression(exp.LikeProperty, this=table, expressions=options) 2657 2658 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2659 return self.expression( 2660 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2661 ) 2662 2663 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2664 self._match(TokenType.EQ) 2665 return self.expression( 2666 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2667 ) 2668 2669 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2670 self._match_text_seq("WITH", "CONNECTION") 2671 return self.expression( 2672 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2673 ) 2674 2675 def _parse_returns(self) -> exp.ReturnsProperty: 2676 value: t.Optional[exp.Expression] 2677 null = None 2678 is_table = self._match(TokenType.TABLE) 2679 2680 if is_table: 2681 if self._match(TokenType.LT): 2682 value = self.expression( 2683 exp.Schema, 2684 this="TABLE", 2685 expressions=self._parse_csv(self._parse_struct_types), 2686 ) 2687 if not self._match(TokenType.GT): 2688 self.raise_error("Expecting >") 2689 else: 2690 value = self._parse_schema(exp.var("TABLE")) 2691 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2692 null = True 2693 value = None 2694 else: 2695 value = self._parse_types() 2696 2697 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2698 2699 def _parse_describe(self) -> exp.Describe: 2700 kind = self._match_set(self.CREATABLES) and self._prev.text 2701 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2702 if self._match(TokenType.DOT): 2703 style = None 2704 self._retreat(self._index - 2) 2705 2706 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2707 2708 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2709 this = self._parse_statement() 2710 else: 2711 this = self._parse_table(schema=True) 2712 2713 properties = self._parse_properties() 2714 expressions = properties.expressions if properties else None 2715 partition = self._parse_partition() 2716 return self.expression( 2717 exp.Describe, 2718 this=this, 2719 style=style, 2720 kind=kind, 2721 expressions=expressions, 2722 partition=partition, 2723 format=format, 2724 ) 2725 2726 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2727 kind = self._prev.text.upper() 2728 expressions = [] 2729 2730 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2731 if self._match(TokenType.WHEN): 2732 expression = self._parse_disjunction() 2733 self._match(TokenType.THEN) 2734 else: 2735 expression = None 2736 2737 else_ = self._match(TokenType.ELSE) 2738 2739 if not self._match(TokenType.INTO): 2740 return None 2741 2742 return self.expression( 2743 exp.ConditionalInsert, 2744 this=self.expression( 2745 exp.Insert, 2746 this=self._parse_table(schema=True), 2747 expression=self._parse_derived_table_values(), 2748 ), 2749 expression=expression, 2750 else_=else_, 2751 ) 2752 2753 expression = parse_conditional_insert() 2754 while expression is not None: 2755 expressions.append(expression) 2756 expression = parse_conditional_insert() 2757 2758 return self.expression( 2759 exp.MultitableInserts, 2760 kind=kind, 2761 comments=comments, 2762 expressions=expressions, 2763 source=self._parse_table(), 2764 ) 2765 2766 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2767 comments = [] 2768 hint = self._parse_hint() 2769 overwrite = self._match(TokenType.OVERWRITE) 2770 ignore = self._match(TokenType.IGNORE) 2771 local = self._match_text_seq("LOCAL") 2772 alternative = None 2773 is_function = None 2774 2775 if self._match_text_seq("DIRECTORY"): 2776 this: t.Optional[exp.Expression] = self.expression( 2777 exp.Directory, 2778 this=self._parse_var_or_string(), 2779 local=local, 2780 row_format=self._parse_row_format(match_row=True), 2781 ) 2782 else: 2783 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2784 comments += ensure_list(self._prev_comments) 2785 return self._parse_multitable_inserts(comments) 2786 2787 if self._match(TokenType.OR): 2788 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2789 2790 self._match(TokenType.INTO) 2791 comments += ensure_list(self._prev_comments) 2792 self._match(TokenType.TABLE) 2793 is_function = self._match(TokenType.FUNCTION) 2794 2795 this = ( 2796 self._parse_table(schema=True, parse_partition=True) 2797 if not is_function 2798 else self._parse_function() 2799 ) 2800 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2801 this.set("alias", self._parse_table_alias()) 2802 2803 returning = self._parse_returning() 2804 2805 return self.expression( 2806 exp.Insert, 2807 comments=comments, 2808 hint=hint, 2809 is_function=is_function, 2810 this=this, 2811 stored=self._match_text_seq("STORED") and self._parse_stored(), 2812 by_name=self._match_text_seq("BY", "NAME"), 2813 exists=self._parse_exists(), 2814 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2815 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2816 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2817 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2818 conflict=self._parse_on_conflict(), 2819 returning=returning or self._parse_returning(), 2820 overwrite=overwrite, 2821 alternative=alternative, 2822 ignore=ignore, 2823 source=self._match(TokenType.TABLE) and self._parse_table(), 2824 ) 2825 2826 def _parse_kill(self) -> exp.Kill: 2827 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2828 2829 return self.expression( 2830 exp.Kill, 2831 this=self._parse_primary(), 2832 kind=kind, 2833 ) 2834 2835 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2836 conflict = self._match_text_seq("ON", "CONFLICT") 2837 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2838 2839 if not conflict and not duplicate: 2840 return None 2841 2842 conflict_keys = None 2843 constraint = None 2844 2845 if conflict: 2846 if self._match_text_seq("ON", "CONSTRAINT"): 2847 constraint = self._parse_id_var() 2848 elif self._match(TokenType.L_PAREN): 2849 conflict_keys = self._parse_csv(self._parse_id_var) 2850 self._match_r_paren() 2851 2852 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2853 if self._prev.token_type == TokenType.UPDATE: 2854 self._match(TokenType.SET) 2855 expressions = self._parse_csv(self._parse_equality) 2856 else: 2857 expressions = None 2858 2859 return self.expression( 2860 exp.OnConflict, 2861 duplicate=duplicate, 2862 expressions=expressions, 2863 action=action, 2864 conflict_keys=conflict_keys, 2865 constraint=constraint, 2866 where=self._parse_where(), 2867 ) 2868 2869 def _parse_returning(self) -> t.Optional[exp.Returning]: 2870 if not self._match(TokenType.RETURNING): 2871 return None 2872 return self.expression( 2873 exp.Returning, 2874 expressions=self._parse_csv(self._parse_expression), 2875 into=self._match(TokenType.INTO) and self._parse_table_part(), 2876 ) 2877 2878 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2879 if not self._match(TokenType.FORMAT): 2880 return None 2881 return self._parse_row_format() 2882 2883 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2884 index = self._index 2885 with_ = with_ or self._match_text_seq("WITH") 2886 2887 if not self._match(TokenType.SERDE_PROPERTIES): 2888 self._retreat(index) 2889 return None 2890 return self.expression( 2891 exp.SerdeProperties, 2892 **{ # type: ignore 2893 "expressions": self._parse_wrapped_properties(), 2894 "with": with_, 2895 }, 2896 ) 2897 2898 def _parse_row_format( 2899 self, match_row: bool = False 2900 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2901 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2902 return None 2903 2904 if self._match_text_seq("SERDE"): 2905 this = self._parse_string() 2906 2907 serde_properties = self._parse_serde_properties() 2908 2909 return self.expression( 2910 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2911 ) 2912 2913 self._match_text_seq("DELIMITED") 2914 2915 kwargs = {} 2916 2917 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2918 kwargs["fields"] = self._parse_string() 2919 if self._match_text_seq("ESCAPED", "BY"): 2920 kwargs["escaped"] = self._parse_string() 2921 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2922 kwargs["collection_items"] = self._parse_string() 2923 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2924 kwargs["map_keys"] = self._parse_string() 2925 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2926 kwargs["lines"] = self._parse_string() 2927 if self._match_text_seq("NULL", "DEFINED", "AS"): 2928 kwargs["null"] = self._parse_string() 2929 2930 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2931 2932 def _parse_load(self) -> exp.LoadData | exp.Command: 2933 if self._match_text_seq("DATA"): 2934 local = self._match_text_seq("LOCAL") 2935 self._match_text_seq("INPATH") 2936 inpath = self._parse_string() 2937 overwrite = self._match(TokenType.OVERWRITE) 2938 self._match_pair(TokenType.INTO, TokenType.TABLE) 2939 2940 return self.expression( 2941 exp.LoadData, 2942 this=self._parse_table(schema=True), 2943 local=local, 2944 overwrite=overwrite, 2945 inpath=inpath, 2946 partition=self._parse_partition(), 2947 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2948 serde=self._match_text_seq("SERDE") and self._parse_string(), 2949 ) 2950 return self._parse_as_command(self._prev) 2951 2952 def _parse_delete(self) -> exp.Delete: 2953 # This handles MySQL's "Multiple-Table Syntax" 2954 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2955 tables = None 2956 if not self._match(TokenType.FROM, advance=False): 2957 tables = self._parse_csv(self._parse_table) or None 2958 2959 returning = self._parse_returning() 2960 2961 return self.expression( 2962 exp.Delete, 2963 tables=tables, 2964 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2965 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2966 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2967 where=self._parse_where(), 2968 returning=returning or self._parse_returning(), 2969 limit=self._parse_limit(), 2970 ) 2971 2972 def _parse_update(self) -> exp.Update: 2973 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2974 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2975 returning = self._parse_returning() 2976 return self.expression( 2977 exp.Update, 2978 **{ # type: ignore 2979 "this": this, 2980 "expressions": expressions, 2981 "from": self._parse_from(joins=True), 2982 "where": self._parse_where(), 2983 "returning": returning or self._parse_returning(), 2984 "order": self._parse_order(), 2985 "limit": self._parse_limit(), 2986 }, 2987 ) 2988 2989 def _parse_use(self) -> exp.Use: 2990 return self.expression( 2991 exp.Use, 2992 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 2993 this=self._parse_table(schema=False), 2994 ) 2995 2996 def _parse_uncache(self) -> exp.Uncache: 2997 if not self._match(TokenType.TABLE): 2998 self.raise_error("Expecting TABLE after UNCACHE") 2999 3000 return self.expression( 3001 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3002 ) 3003 3004 def _parse_cache(self) -> exp.Cache: 3005 lazy = self._match_text_seq("LAZY") 3006 self._match(TokenType.TABLE) 3007 table = self._parse_table(schema=True) 3008 3009 options = [] 3010 if self._match_text_seq("OPTIONS"): 3011 self._match_l_paren() 3012 k = self._parse_string() 3013 self._match(TokenType.EQ) 3014 v = self._parse_string() 3015 options = [k, v] 3016 self._match_r_paren() 3017 3018 self._match(TokenType.ALIAS) 3019 return self.expression( 3020 exp.Cache, 3021 this=table, 3022 lazy=lazy, 3023 options=options, 3024 expression=self._parse_select(nested=True), 3025 ) 3026 3027 def _parse_partition(self) -> t.Optional[exp.Partition]: 3028 if not self._match_texts(self.PARTITION_KEYWORDS): 3029 return None 3030 3031 return self.expression( 3032 exp.Partition, 3033 subpartition=self._prev.text.upper() == "SUBPARTITION", 3034 expressions=self._parse_wrapped_csv(self._parse_assignment), 3035 ) 3036 3037 def _parse_value(self) -> t.Optional[exp.Tuple]: 3038 def _parse_value_expression() -> t.Optional[exp.Expression]: 3039 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3040 return exp.var(self._prev.text.upper()) 3041 return self._parse_expression() 3042 3043 if self._match(TokenType.L_PAREN): 3044 expressions = self._parse_csv(_parse_value_expression) 3045 self._match_r_paren() 3046 return self.expression(exp.Tuple, expressions=expressions) 3047 3048 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3049 expression = self._parse_expression() 3050 if expression: 3051 return self.expression(exp.Tuple, expressions=[expression]) 3052 return None 3053 3054 def _parse_projections(self) -> t.List[exp.Expression]: 3055 return self._parse_expressions() 3056 3057 def _parse_select( 3058 self, 3059 nested: bool = False, 3060 table: bool = False, 3061 parse_subquery_alias: bool = True, 3062 parse_set_operation: bool = True, 3063 ) -> t.Optional[exp.Expression]: 3064 cte = self._parse_with() 3065 3066 if cte: 3067 this = self._parse_statement() 3068 3069 if not this: 3070 self.raise_error("Failed to parse any statement following CTE") 3071 return cte 3072 3073 if "with" in this.arg_types: 3074 this.set("with", cte) 3075 else: 3076 self.raise_error(f"{this.key} does not support CTE") 3077 this = cte 3078 3079 return this 3080 3081 # duckdb supports leading with FROM x 3082 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3083 3084 if self._match(TokenType.SELECT): 3085 comments = self._prev_comments 3086 3087 hint = self._parse_hint() 3088 3089 if self._next and not self._next.token_type == TokenType.DOT: 3090 all_ = self._match(TokenType.ALL) 3091 distinct = self._match_set(self.DISTINCT_TOKENS) 3092 else: 3093 all_, distinct = None, None 3094 3095 kind = ( 3096 self._match(TokenType.ALIAS) 3097 and self._match_texts(("STRUCT", "VALUE")) 3098 and self._prev.text.upper() 3099 ) 3100 3101 if distinct: 3102 distinct = self.expression( 3103 exp.Distinct, 3104 on=self._parse_value() if self._match(TokenType.ON) else None, 3105 ) 3106 3107 if all_ and distinct: 3108 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3109 3110 operation_modifiers = [] 3111 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3112 operation_modifiers.append(exp.var(self._prev.text.upper())) 3113 3114 limit = self._parse_limit(top=True) 3115 projections = self._parse_projections() 3116 3117 this = self.expression( 3118 exp.Select, 3119 kind=kind, 3120 hint=hint, 3121 distinct=distinct, 3122 expressions=projections, 3123 limit=limit, 3124 operation_modifiers=operation_modifiers or None, 3125 ) 3126 this.comments = comments 3127 3128 into = self._parse_into() 3129 if into: 3130 this.set("into", into) 3131 3132 if not from_: 3133 from_ = self._parse_from() 3134 3135 if from_: 3136 this.set("from", from_) 3137 3138 this = self._parse_query_modifiers(this) 3139 elif (table or nested) and self._match(TokenType.L_PAREN): 3140 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3141 this = self._parse_simplified_pivot( 3142 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3143 ) 3144 elif self._match(TokenType.FROM): 3145 from_ = self._parse_from(skip_from_token=True) 3146 # Support parentheses for duckdb FROM-first syntax 3147 select = self._parse_select() 3148 if select: 3149 select.set("from", from_) 3150 this = select 3151 else: 3152 this = exp.select("*").from_(t.cast(exp.From, from_)) 3153 else: 3154 this = ( 3155 self._parse_table() 3156 if table 3157 else self._parse_select(nested=True, parse_set_operation=False) 3158 ) 3159 3160 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3161 # in case a modifier (e.g. join) is following 3162 if table and isinstance(this, exp.Values) and this.alias: 3163 alias = this.args["alias"].pop() 3164 this = exp.Table(this=this, alias=alias) 3165 3166 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3167 3168 self._match_r_paren() 3169 3170 # We return early here so that the UNION isn't attached to the subquery by the 3171 # following call to _parse_set_operations, but instead becomes the parent node 3172 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3173 elif self._match(TokenType.VALUES, advance=False): 3174 this = self._parse_derived_table_values() 3175 elif from_: 3176 this = exp.select("*").from_(from_.this, copy=False) 3177 elif self._match(TokenType.SUMMARIZE): 3178 table = self._match(TokenType.TABLE) 3179 this = self._parse_select() or self._parse_string() or self._parse_table() 3180 return self.expression(exp.Summarize, this=this, table=table) 3181 elif self._match(TokenType.DESCRIBE): 3182 this = self._parse_describe() 3183 elif self._match_text_seq("STREAM"): 3184 this = self._parse_function() 3185 if this: 3186 this = self.expression(exp.Stream, this=this) 3187 else: 3188 self._retreat(self._index - 1) 3189 else: 3190 this = None 3191 3192 return self._parse_set_operations(this) if parse_set_operation else this 3193 3194 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3195 self._match_text_seq("SEARCH") 3196 3197 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3198 3199 if not kind: 3200 return None 3201 3202 self._match_text_seq("FIRST", "BY") 3203 3204 return self.expression( 3205 exp.RecursiveWithSearch, 3206 kind=kind, 3207 this=self._parse_id_var(), 3208 expression=self._match_text_seq("SET") and self._parse_id_var(), 3209 using=self._match_text_seq("USING") and self._parse_id_var(), 3210 ) 3211 3212 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3213 if not skip_with_token and not self._match(TokenType.WITH): 3214 return None 3215 3216 comments = self._prev_comments 3217 recursive = self._match(TokenType.RECURSIVE) 3218 3219 last_comments = None 3220 expressions = [] 3221 while True: 3222 cte = self._parse_cte() 3223 if isinstance(cte, exp.CTE): 3224 expressions.append(cte) 3225 if last_comments: 3226 cte.add_comments(last_comments) 3227 3228 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3229 break 3230 else: 3231 self._match(TokenType.WITH) 3232 3233 last_comments = self._prev_comments 3234 3235 return self.expression( 3236 exp.With, 3237 comments=comments, 3238 expressions=expressions, 3239 recursive=recursive, 3240 search=self._parse_recursive_with_search(), 3241 ) 3242 3243 def _parse_cte(self) -> t.Optional[exp.CTE]: 3244 index = self._index 3245 3246 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3247 if not alias or not alias.this: 3248 self.raise_error("Expected CTE to have alias") 3249 3250 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3251 self._retreat(index) 3252 return None 3253 3254 comments = self._prev_comments 3255 3256 if self._match_text_seq("NOT", "MATERIALIZED"): 3257 materialized = False 3258 elif self._match_text_seq("MATERIALIZED"): 3259 materialized = True 3260 else: 3261 materialized = None 3262 3263 cte = self.expression( 3264 exp.CTE, 3265 this=self._parse_wrapped(self._parse_statement), 3266 alias=alias, 3267 materialized=materialized, 3268 comments=comments, 3269 ) 3270 3271 if isinstance(cte.this, exp.Values): 3272 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3273 3274 return cte 3275 3276 def _parse_table_alias( 3277 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3278 ) -> t.Optional[exp.TableAlias]: 3279 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3280 # so this section tries to parse the clause version and if it fails, it treats the token 3281 # as an identifier (alias) 3282 if self._can_parse_limit_or_offset(): 3283 return None 3284 3285 any_token = self._match(TokenType.ALIAS) 3286 alias = ( 3287 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3288 or self._parse_string_as_identifier() 3289 ) 3290 3291 index = self._index 3292 if self._match(TokenType.L_PAREN): 3293 columns = self._parse_csv(self._parse_function_parameter) 3294 self._match_r_paren() if columns else self._retreat(index) 3295 else: 3296 columns = None 3297 3298 if not alias and not columns: 3299 return None 3300 3301 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3302 3303 # We bubble up comments from the Identifier to the TableAlias 3304 if isinstance(alias, exp.Identifier): 3305 table_alias.add_comments(alias.pop_comments()) 3306 3307 return table_alias 3308 3309 def _parse_subquery( 3310 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3311 ) -> t.Optional[exp.Subquery]: 3312 if not this: 3313 return None 3314 3315 return self.expression( 3316 exp.Subquery, 3317 this=this, 3318 pivots=self._parse_pivots(), 3319 alias=self._parse_table_alias() if parse_alias else None, 3320 sample=self._parse_table_sample(), 3321 ) 3322 3323 def _implicit_unnests_to_explicit(self, this: E) -> E: 3324 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3325 3326 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3327 for i, join in enumerate(this.args.get("joins") or []): 3328 table = join.this 3329 normalized_table = table.copy() 3330 normalized_table.meta["maybe_column"] = True 3331 normalized_table = _norm(normalized_table, dialect=self.dialect) 3332 3333 if isinstance(table, exp.Table) and not join.args.get("on"): 3334 if normalized_table.parts[0].name in refs: 3335 table_as_column = table.to_column() 3336 unnest = exp.Unnest(expressions=[table_as_column]) 3337 3338 # Table.to_column creates a parent Alias node that we want to convert to 3339 # a TableAlias and attach to the Unnest, so it matches the parser's output 3340 if isinstance(table.args.get("alias"), exp.TableAlias): 3341 table_as_column.replace(table_as_column.this) 3342 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3343 3344 table.replace(unnest) 3345 3346 refs.add(normalized_table.alias_or_name) 3347 3348 return this 3349 3350 def _parse_query_modifiers( 3351 self, this: t.Optional[exp.Expression] 3352 ) -> t.Optional[exp.Expression]: 3353 if isinstance(this, (exp.Query, exp.Table)): 3354 for join in self._parse_joins(): 3355 this.append("joins", join) 3356 for lateral in iter(self._parse_lateral, None): 3357 this.append("laterals", lateral) 3358 3359 while True: 3360 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3361 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3362 key, expression = parser(self) 3363 3364 if expression: 3365 this.set(key, expression) 3366 if key == "limit": 3367 offset = expression.args.pop("offset", None) 3368 3369 if offset: 3370 offset = exp.Offset(expression=offset) 3371 this.set("offset", offset) 3372 3373 limit_by_expressions = expression.expressions 3374 expression.set("expressions", None) 3375 offset.set("expressions", limit_by_expressions) 3376 continue 3377 break 3378 3379 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3380 this = self._implicit_unnests_to_explicit(this) 3381 3382 return this 3383 3384 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3385 start = self._curr 3386 while self._curr: 3387 self._advance() 3388 3389 end = self._tokens[self._index - 1] 3390 return exp.Hint(expressions=[self._find_sql(start, end)]) 3391 3392 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3393 return self._parse_function_call() 3394 3395 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3396 start_index = self._index 3397 should_fallback_to_string = False 3398 3399 hints = [] 3400 try: 3401 for hint in iter( 3402 lambda: self._parse_csv( 3403 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3404 ), 3405 [], 3406 ): 3407 hints.extend(hint) 3408 except ParseError: 3409 should_fallback_to_string = True 3410 3411 if should_fallback_to_string or self._curr: 3412 self._retreat(start_index) 3413 return self._parse_hint_fallback_to_string() 3414 3415 return self.expression(exp.Hint, expressions=hints) 3416 3417 def _parse_hint(self) -> t.Optional[exp.Hint]: 3418 if self._match(TokenType.HINT) and self._prev_comments: 3419 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3420 3421 return None 3422 3423 def _parse_into(self) -> t.Optional[exp.Into]: 3424 if not self._match(TokenType.INTO): 3425 return None 3426 3427 temp = self._match(TokenType.TEMPORARY) 3428 unlogged = self._match_text_seq("UNLOGGED") 3429 self._match(TokenType.TABLE) 3430 3431 return self.expression( 3432 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3433 ) 3434 3435 def _parse_from( 3436 self, joins: bool = False, skip_from_token: bool = False 3437 ) -> t.Optional[exp.From]: 3438 if not skip_from_token and not self._match(TokenType.FROM): 3439 return None 3440 3441 return self.expression( 3442 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3443 ) 3444 3445 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3446 return self.expression( 3447 exp.MatchRecognizeMeasure, 3448 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3449 this=self._parse_expression(), 3450 ) 3451 3452 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3453 if not self._match(TokenType.MATCH_RECOGNIZE): 3454 return None 3455 3456 self._match_l_paren() 3457 3458 partition = self._parse_partition_by() 3459 order = self._parse_order() 3460 3461 measures = ( 3462 self._parse_csv(self._parse_match_recognize_measure) 3463 if self._match_text_seq("MEASURES") 3464 else None 3465 ) 3466 3467 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3468 rows = exp.var("ONE ROW PER MATCH") 3469 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3470 text = "ALL ROWS PER MATCH" 3471 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3472 text += " SHOW EMPTY MATCHES" 3473 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3474 text += " OMIT EMPTY MATCHES" 3475 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3476 text += " WITH UNMATCHED ROWS" 3477 rows = exp.var(text) 3478 else: 3479 rows = None 3480 3481 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3482 text = "AFTER MATCH SKIP" 3483 if self._match_text_seq("PAST", "LAST", "ROW"): 3484 text += " PAST LAST ROW" 3485 elif self._match_text_seq("TO", "NEXT", "ROW"): 3486 text += " TO NEXT ROW" 3487 elif self._match_text_seq("TO", "FIRST"): 3488 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3489 elif self._match_text_seq("TO", "LAST"): 3490 text += f" TO LAST {self._advance_any().text}" # type: ignore 3491 after = exp.var(text) 3492 else: 3493 after = None 3494 3495 if self._match_text_seq("PATTERN"): 3496 self._match_l_paren() 3497 3498 if not self._curr: 3499 self.raise_error("Expecting )", self._curr) 3500 3501 paren = 1 3502 start = self._curr 3503 3504 while self._curr and paren > 0: 3505 if self._curr.token_type == TokenType.L_PAREN: 3506 paren += 1 3507 if self._curr.token_type == TokenType.R_PAREN: 3508 paren -= 1 3509 3510 end = self._prev 3511 self._advance() 3512 3513 if paren > 0: 3514 self.raise_error("Expecting )", self._curr) 3515 3516 pattern = exp.var(self._find_sql(start, end)) 3517 else: 3518 pattern = None 3519 3520 define = ( 3521 self._parse_csv(self._parse_name_as_expression) 3522 if self._match_text_seq("DEFINE") 3523 else None 3524 ) 3525 3526 self._match_r_paren() 3527 3528 return self.expression( 3529 exp.MatchRecognize, 3530 partition_by=partition, 3531 order=order, 3532 measures=measures, 3533 rows=rows, 3534 after=after, 3535 pattern=pattern, 3536 define=define, 3537 alias=self._parse_table_alias(), 3538 ) 3539 3540 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3541 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3542 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3543 cross_apply = False 3544 3545 if cross_apply is not None: 3546 this = self._parse_select(table=True) 3547 view = None 3548 outer = None 3549 elif self._match(TokenType.LATERAL): 3550 this = self._parse_select(table=True) 3551 view = self._match(TokenType.VIEW) 3552 outer = self._match(TokenType.OUTER) 3553 else: 3554 return None 3555 3556 if not this: 3557 this = ( 3558 self._parse_unnest() 3559 or self._parse_function() 3560 or self._parse_id_var(any_token=False) 3561 ) 3562 3563 while self._match(TokenType.DOT): 3564 this = exp.Dot( 3565 this=this, 3566 expression=self._parse_function() or self._parse_id_var(any_token=False), 3567 ) 3568 3569 if view: 3570 table = self._parse_id_var(any_token=False) 3571 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3572 table_alias: t.Optional[exp.TableAlias] = self.expression( 3573 exp.TableAlias, this=table, columns=columns 3574 ) 3575 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3576 # We move the alias from the lateral's child node to the lateral itself 3577 table_alias = this.args["alias"].pop() 3578 else: 3579 table_alias = self._parse_table_alias() 3580 3581 return self.expression( 3582 exp.Lateral, 3583 this=this, 3584 view=view, 3585 outer=outer, 3586 alias=table_alias, 3587 cross_apply=cross_apply, 3588 ) 3589 3590 def _parse_join_parts( 3591 self, 3592 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3593 return ( 3594 self._match_set(self.JOIN_METHODS) and self._prev, 3595 self._match_set(self.JOIN_SIDES) and self._prev, 3596 self._match_set(self.JOIN_KINDS) and self._prev, 3597 ) 3598 3599 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3600 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3601 this = self._parse_column() 3602 if isinstance(this, exp.Column): 3603 return this.this 3604 return this 3605 3606 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3607 3608 def _parse_join( 3609 self, skip_join_token: bool = False, parse_bracket: bool = False 3610 ) -> t.Optional[exp.Join]: 3611 if self._match(TokenType.COMMA): 3612 table = self._try_parse(self._parse_table) 3613 if table: 3614 return self.expression(exp.Join, this=table) 3615 return None 3616 3617 index = self._index 3618 method, side, kind = self._parse_join_parts() 3619 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3620 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3621 3622 if not skip_join_token and not join: 3623 self._retreat(index) 3624 kind = None 3625 method = None 3626 side = None 3627 3628 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3629 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3630 3631 if not skip_join_token and not join and not outer_apply and not cross_apply: 3632 return None 3633 3634 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3635 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3636 kwargs["expressions"] = self._parse_csv( 3637 lambda: self._parse_table(parse_bracket=parse_bracket) 3638 ) 3639 3640 if method: 3641 kwargs["method"] = method.text 3642 if side: 3643 kwargs["side"] = side.text 3644 if kind: 3645 kwargs["kind"] = kind.text 3646 if hint: 3647 kwargs["hint"] = hint 3648 3649 if self._match(TokenType.MATCH_CONDITION): 3650 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3651 3652 if self._match(TokenType.ON): 3653 kwargs["on"] = self._parse_assignment() 3654 elif self._match(TokenType.USING): 3655 kwargs["using"] = self._parse_using_identifiers() 3656 elif ( 3657 not (outer_apply or cross_apply) 3658 and not isinstance(kwargs["this"], exp.Unnest) 3659 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3660 ): 3661 index = self._index 3662 joins: t.Optional[list] = list(self._parse_joins()) 3663 3664 if joins and self._match(TokenType.ON): 3665 kwargs["on"] = self._parse_assignment() 3666 elif joins and self._match(TokenType.USING): 3667 kwargs["using"] = self._parse_using_identifiers() 3668 else: 3669 joins = None 3670 self._retreat(index) 3671 3672 kwargs["this"].set("joins", joins if joins else None) 3673 3674 comments = [c for token in (method, side, kind) if token for c in token.comments] 3675 return self.expression(exp.Join, comments=comments, **kwargs) 3676 3677 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3678 this = self._parse_assignment() 3679 3680 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3681 return this 3682 3683 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3684 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3685 3686 return this 3687 3688 def _parse_index_params(self) -> exp.IndexParameters: 3689 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3690 3691 if self._match(TokenType.L_PAREN, advance=False): 3692 columns = self._parse_wrapped_csv(self._parse_with_operator) 3693 else: 3694 columns = None 3695 3696 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3697 partition_by = self._parse_partition_by() 3698 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3699 tablespace = ( 3700 self._parse_var(any_token=True) 3701 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3702 else None 3703 ) 3704 where = self._parse_where() 3705 3706 on = self._parse_field() if self._match(TokenType.ON) else None 3707 3708 return self.expression( 3709 exp.IndexParameters, 3710 using=using, 3711 columns=columns, 3712 include=include, 3713 partition_by=partition_by, 3714 where=where, 3715 with_storage=with_storage, 3716 tablespace=tablespace, 3717 on=on, 3718 ) 3719 3720 def _parse_index( 3721 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3722 ) -> t.Optional[exp.Index]: 3723 if index or anonymous: 3724 unique = None 3725 primary = None 3726 amp = None 3727 3728 self._match(TokenType.ON) 3729 self._match(TokenType.TABLE) # hive 3730 table = self._parse_table_parts(schema=True) 3731 else: 3732 unique = self._match(TokenType.UNIQUE) 3733 primary = self._match_text_seq("PRIMARY") 3734 amp = self._match_text_seq("AMP") 3735 3736 if not self._match(TokenType.INDEX): 3737 return None 3738 3739 index = self._parse_id_var() 3740 table = None 3741 3742 params = self._parse_index_params() 3743 3744 return self.expression( 3745 exp.Index, 3746 this=index, 3747 table=table, 3748 unique=unique, 3749 primary=primary, 3750 amp=amp, 3751 params=params, 3752 ) 3753 3754 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3755 hints: t.List[exp.Expression] = [] 3756 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3757 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3758 hints.append( 3759 self.expression( 3760 exp.WithTableHint, 3761 expressions=self._parse_csv( 3762 lambda: self._parse_function() or self._parse_var(any_token=True) 3763 ), 3764 ) 3765 ) 3766 self._match_r_paren() 3767 else: 3768 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3769 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3770 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3771 3772 self._match_set((TokenType.INDEX, TokenType.KEY)) 3773 if self._match(TokenType.FOR): 3774 hint.set("target", self._advance_any() and self._prev.text.upper()) 3775 3776 hint.set("expressions", self._parse_wrapped_id_vars()) 3777 hints.append(hint) 3778 3779 return hints or None 3780 3781 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3782 return ( 3783 (not schema and self._parse_function(optional_parens=False)) 3784 or self._parse_id_var(any_token=False) 3785 or self._parse_string_as_identifier() 3786 or self._parse_placeholder() 3787 ) 3788 3789 def _parse_table_parts( 3790 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3791 ) -> exp.Table: 3792 catalog = None 3793 db = None 3794 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3795 3796 while self._match(TokenType.DOT): 3797 if catalog: 3798 # This allows nesting the table in arbitrarily many dot expressions if needed 3799 table = self.expression( 3800 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3801 ) 3802 else: 3803 catalog = db 3804 db = table 3805 # "" used for tsql FROM a..b case 3806 table = self._parse_table_part(schema=schema) or "" 3807 3808 if ( 3809 wildcard 3810 and self._is_connected() 3811 and (isinstance(table, exp.Identifier) or not table) 3812 and self._match(TokenType.STAR) 3813 ): 3814 if isinstance(table, exp.Identifier): 3815 table.args["this"] += "*" 3816 else: 3817 table = exp.Identifier(this="*") 3818 3819 # We bubble up comments from the Identifier to the Table 3820 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3821 3822 if is_db_reference: 3823 catalog = db 3824 db = table 3825 table = None 3826 3827 if not table and not is_db_reference: 3828 self.raise_error(f"Expected table name but got {self._curr}") 3829 if not db and is_db_reference: 3830 self.raise_error(f"Expected database name but got {self._curr}") 3831 3832 table = self.expression( 3833 exp.Table, 3834 comments=comments, 3835 this=table, 3836 db=db, 3837 catalog=catalog, 3838 ) 3839 3840 changes = self._parse_changes() 3841 if changes: 3842 table.set("changes", changes) 3843 3844 at_before = self._parse_historical_data() 3845 if at_before: 3846 table.set("when", at_before) 3847 3848 pivots = self._parse_pivots() 3849 if pivots: 3850 table.set("pivots", pivots) 3851 3852 return table 3853 3854 def _parse_table( 3855 self, 3856 schema: bool = False, 3857 joins: bool = False, 3858 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3859 parse_bracket: bool = False, 3860 is_db_reference: bool = False, 3861 parse_partition: bool = False, 3862 ) -> t.Optional[exp.Expression]: 3863 lateral = self._parse_lateral() 3864 if lateral: 3865 return lateral 3866 3867 unnest = self._parse_unnest() 3868 if unnest: 3869 return unnest 3870 3871 values = self._parse_derived_table_values() 3872 if values: 3873 return values 3874 3875 subquery = self._parse_select(table=True) 3876 if subquery: 3877 if not subquery.args.get("pivots"): 3878 subquery.set("pivots", self._parse_pivots()) 3879 return subquery 3880 3881 bracket = parse_bracket and self._parse_bracket(None) 3882 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3883 3884 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3885 self._parse_table 3886 ) 3887 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3888 3889 only = self._match(TokenType.ONLY) 3890 3891 this = t.cast( 3892 exp.Expression, 3893 bracket 3894 or rows_from 3895 or self._parse_bracket( 3896 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3897 ), 3898 ) 3899 3900 if only: 3901 this.set("only", only) 3902 3903 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3904 self._match_text_seq("*") 3905 3906 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3907 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3908 this.set("partition", self._parse_partition()) 3909 3910 if schema: 3911 return self._parse_schema(this=this) 3912 3913 version = self._parse_version() 3914 3915 if version: 3916 this.set("version", version) 3917 3918 if self.dialect.ALIAS_POST_TABLESAMPLE: 3919 this.set("sample", self._parse_table_sample()) 3920 3921 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3922 if alias: 3923 this.set("alias", alias) 3924 3925 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3926 return self.expression( 3927 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3928 ) 3929 3930 this.set("hints", self._parse_table_hints()) 3931 3932 if not this.args.get("pivots"): 3933 this.set("pivots", self._parse_pivots()) 3934 3935 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3936 this.set("sample", self._parse_table_sample()) 3937 3938 if joins: 3939 for join in self._parse_joins(): 3940 this.append("joins", join) 3941 3942 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3943 this.set("ordinality", True) 3944 this.set("alias", self._parse_table_alias()) 3945 3946 return this 3947 3948 def _parse_version(self) -> t.Optional[exp.Version]: 3949 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3950 this = "TIMESTAMP" 3951 elif self._match(TokenType.VERSION_SNAPSHOT): 3952 this = "VERSION" 3953 else: 3954 return None 3955 3956 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3957 kind = self._prev.text.upper() 3958 start = self._parse_bitwise() 3959 self._match_texts(("TO", "AND")) 3960 end = self._parse_bitwise() 3961 expression: t.Optional[exp.Expression] = self.expression( 3962 exp.Tuple, expressions=[start, end] 3963 ) 3964 elif self._match_text_seq("CONTAINED", "IN"): 3965 kind = "CONTAINED IN" 3966 expression = self.expression( 3967 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3968 ) 3969 elif self._match(TokenType.ALL): 3970 kind = "ALL" 3971 expression = None 3972 else: 3973 self._match_text_seq("AS", "OF") 3974 kind = "AS OF" 3975 expression = self._parse_type() 3976 3977 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3978 3979 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3980 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3981 index = self._index 3982 historical_data = None 3983 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3984 this = self._prev.text.upper() 3985 kind = ( 3986 self._match(TokenType.L_PAREN) 3987 and self._match_texts(self.HISTORICAL_DATA_KIND) 3988 and self._prev.text.upper() 3989 ) 3990 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3991 3992 if expression: 3993 self._match_r_paren() 3994 historical_data = self.expression( 3995 exp.HistoricalData, this=this, kind=kind, expression=expression 3996 ) 3997 else: 3998 self._retreat(index) 3999 4000 return historical_data 4001 4002 def _parse_changes(self) -> t.Optional[exp.Changes]: 4003 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4004 return None 4005 4006 information = self._parse_var(any_token=True) 4007 self._match_r_paren() 4008 4009 return self.expression( 4010 exp.Changes, 4011 information=information, 4012 at_before=self._parse_historical_data(), 4013 end=self._parse_historical_data(), 4014 ) 4015 4016 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4017 if not self._match(TokenType.UNNEST): 4018 return None 4019 4020 expressions = self._parse_wrapped_csv(self._parse_equality) 4021 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4022 4023 alias = self._parse_table_alias() if with_alias else None 4024 4025 if alias: 4026 if self.dialect.UNNEST_COLUMN_ONLY: 4027 if alias.args.get("columns"): 4028 self.raise_error("Unexpected extra column alias in unnest.") 4029 4030 alias.set("columns", [alias.this]) 4031 alias.set("this", None) 4032 4033 columns = alias.args.get("columns") or [] 4034 if offset and len(expressions) < len(columns): 4035 offset = columns.pop() 4036 4037 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4038 self._match(TokenType.ALIAS) 4039 offset = self._parse_id_var( 4040 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4041 ) or exp.to_identifier("offset") 4042 4043 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4044 4045 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4046 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4047 if not is_derived and not ( 4048 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4049 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4050 ): 4051 return None 4052 4053 expressions = self._parse_csv(self._parse_value) 4054 alias = self._parse_table_alias() 4055 4056 if is_derived: 4057 self._match_r_paren() 4058 4059 return self.expression( 4060 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4061 ) 4062 4063 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4064 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4065 as_modifier and self._match_text_seq("USING", "SAMPLE") 4066 ): 4067 return None 4068 4069 bucket_numerator = None 4070 bucket_denominator = None 4071 bucket_field = None 4072 percent = None 4073 size = None 4074 seed = None 4075 4076 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4077 matched_l_paren = self._match(TokenType.L_PAREN) 4078 4079 if self.TABLESAMPLE_CSV: 4080 num = None 4081 expressions = self._parse_csv(self._parse_primary) 4082 else: 4083 expressions = None 4084 num = ( 4085 self._parse_factor() 4086 if self._match(TokenType.NUMBER, advance=False) 4087 else self._parse_primary() or self._parse_placeholder() 4088 ) 4089 4090 if self._match_text_seq("BUCKET"): 4091 bucket_numerator = self._parse_number() 4092 self._match_text_seq("OUT", "OF") 4093 bucket_denominator = bucket_denominator = self._parse_number() 4094 self._match(TokenType.ON) 4095 bucket_field = self._parse_field() 4096 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4097 percent = num 4098 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4099 size = num 4100 else: 4101 percent = num 4102 4103 if matched_l_paren: 4104 self._match_r_paren() 4105 4106 if self._match(TokenType.L_PAREN): 4107 method = self._parse_var(upper=True) 4108 seed = self._match(TokenType.COMMA) and self._parse_number() 4109 self._match_r_paren() 4110 elif self._match_texts(("SEED", "REPEATABLE")): 4111 seed = self._parse_wrapped(self._parse_number) 4112 4113 if not method and self.DEFAULT_SAMPLING_METHOD: 4114 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4115 4116 return self.expression( 4117 exp.TableSample, 4118 expressions=expressions, 4119 method=method, 4120 bucket_numerator=bucket_numerator, 4121 bucket_denominator=bucket_denominator, 4122 bucket_field=bucket_field, 4123 percent=percent, 4124 size=size, 4125 seed=seed, 4126 ) 4127 4128 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4129 return list(iter(self._parse_pivot, None)) or None 4130 4131 def _parse_joins(self) -> t.Iterator[exp.Join]: 4132 return iter(self._parse_join, None) 4133 4134 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4135 if not self._match(TokenType.INTO): 4136 return None 4137 4138 return self.expression( 4139 exp.UnpivotColumns, 4140 this=self._match_text_seq("NAME") and self._parse_column(), 4141 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4142 ) 4143 4144 # https://duckdb.org/docs/sql/statements/pivot 4145 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4146 def _parse_on() -> t.Optional[exp.Expression]: 4147 this = self._parse_bitwise() 4148 4149 if self._match(TokenType.IN): 4150 # PIVOT ... ON col IN (row_val1, row_val2) 4151 return self._parse_in(this) 4152 if self._match(TokenType.ALIAS, advance=False): 4153 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4154 return self._parse_alias(this) 4155 4156 return this 4157 4158 this = self._parse_table() 4159 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4160 into = self._parse_unpivot_columns() 4161 using = self._match(TokenType.USING) and self._parse_csv( 4162 lambda: self._parse_alias(self._parse_function()) 4163 ) 4164 group = self._parse_group() 4165 4166 return self.expression( 4167 exp.Pivot, 4168 this=this, 4169 expressions=expressions, 4170 using=using, 4171 group=group, 4172 unpivot=is_unpivot, 4173 into=into, 4174 ) 4175 4176 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4177 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4178 this = self._parse_select_or_expression() 4179 4180 self._match(TokenType.ALIAS) 4181 alias = self._parse_bitwise() 4182 if alias: 4183 if isinstance(alias, exp.Column) and not alias.db: 4184 alias = alias.this 4185 return self.expression(exp.PivotAlias, this=this, alias=alias) 4186 4187 return this 4188 4189 value = self._parse_column() 4190 4191 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4192 self.raise_error("Expecting IN (") 4193 4194 if self._match(TokenType.ANY): 4195 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4196 else: 4197 exprs = self._parse_csv(_parse_aliased_expression) 4198 4199 self._match_r_paren() 4200 return self.expression(exp.In, this=value, expressions=exprs) 4201 4202 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4203 index = self._index 4204 include_nulls = None 4205 4206 if self._match(TokenType.PIVOT): 4207 unpivot = False 4208 elif self._match(TokenType.UNPIVOT): 4209 unpivot = True 4210 4211 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4212 if self._match_text_seq("INCLUDE", "NULLS"): 4213 include_nulls = True 4214 elif self._match_text_seq("EXCLUDE", "NULLS"): 4215 include_nulls = False 4216 else: 4217 return None 4218 4219 expressions = [] 4220 4221 if not self._match(TokenType.L_PAREN): 4222 self._retreat(index) 4223 return None 4224 4225 if unpivot: 4226 expressions = self._parse_csv(self._parse_column) 4227 else: 4228 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4229 4230 if not expressions: 4231 self.raise_error("Failed to parse PIVOT's aggregation list") 4232 4233 if not self._match(TokenType.FOR): 4234 self.raise_error("Expecting FOR") 4235 4236 field = self._parse_pivot_in() 4237 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4238 self._parse_bitwise 4239 ) 4240 4241 self._match_r_paren() 4242 4243 pivot = self.expression( 4244 exp.Pivot, 4245 expressions=expressions, 4246 field=field, 4247 unpivot=unpivot, 4248 include_nulls=include_nulls, 4249 default_on_null=default_on_null, 4250 ) 4251 4252 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4253 pivot.set("alias", self._parse_table_alias()) 4254 4255 if not unpivot: 4256 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4257 4258 columns: t.List[exp.Expression] = [] 4259 pivot_field_expressions = pivot.args["field"].expressions 4260 4261 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4262 if not isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4263 for fld in pivot_field_expressions: 4264 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4265 for name in names: 4266 if self.PREFIXED_PIVOT_COLUMNS: 4267 name = f"{name}_{field_name}" if name else field_name 4268 else: 4269 name = f"{field_name}_{name}" if name else field_name 4270 4271 columns.append(exp.to_identifier(name)) 4272 4273 pivot.set("columns", columns) 4274 4275 return pivot 4276 4277 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4278 return [agg.alias for agg in aggregations] 4279 4280 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4281 if not skip_where_token and not self._match(TokenType.PREWHERE): 4282 return None 4283 4284 return self.expression( 4285 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4286 ) 4287 4288 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4289 if not skip_where_token and not self._match(TokenType.WHERE): 4290 return None 4291 4292 return self.expression( 4293 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4294 ) 4295 4296 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4297 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4298 return None 4299 4300 elements: t.Dict[str, t.Any] = defaultdict(list) 4301 4302 if self._match(TokenType.ALL): 4303 elements["all"] = True 4304 elif self._match(TokenType.DISTINCT): 4305 elements["all"] = False 4306 4307 while True: 4308 index = self._index 4309 4310 elements["expressions"].extend( 4311 self._parse_csv( 4312 lambda: None 4313 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4314 else self._parse_assignment() 4315 ) 4316 ) 4317 4318 before_with_index = self._index 4319 with_prefix = self._match(TokenType.WITH) 4320 4321 if self._match(TokenType.ROLLUP): 4322 elements["rollup"].append( 4323 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4324 ) 4325 elif self._match(TokenType.CUBE): 4326 elements["cube"].append( 4327 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4328 ) 4329 elif self._match(TokenType.GROUPING_SETS): 4330 elements["grouping_sets"].append( 4331 self.expression( 4332 exp.GroupingSets, 4333 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4334 ) 4335 ) 4336 elif self._match_text_seq("TOTALS"): 4337 elements["totals"] = True # type: ignore 4338 4339 if before_with_index <= self._index <= before_with_index + 1: 4340 self._retreat(before_with_index) 4341 break 4342 4343 if index == self._index: 4344 break 4345 4346 return self.expression(exp.Group, **elements) # type: ignore 4347 4348 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4349 return self.expression( 4350 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4351 ) 4352 4353 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4354 if self._match(TokenType.L_PAREN): 4355 grouping_set = self._parse_csv(self._parse_column) 4356 self._match_r_paren() 4357 return self.expression(exp.Tuple, expressions=grouping_set) 4358 4359 return self._parse_column() 4360 4361 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4362 if not skip_having_token and not self._match(TokenType.HAVING): 4363 return None 4364 return self.expression(exp.Having, this=self._parse_assignment()) 4365 4366 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4367 if not self._match(TokenType.QUALIFY): 4368 return None 4369 return self.expression(exp.Qualify, this=self._parse_assignment()) 4370 4371 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4372 if skip_start_token: 4373 start = None 4374 elif self._match(TokenType.START_WITH): 4375 start = self._parse_assignment() 4376 else: 4377 return None 4378 4379 self._match(TokenType.CONNECT_BY) 4380 nocycle = self._match_text_seq("NOCYCLE") 4381 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4382 exp.Prior, this=self._parse_bitwise() 4383 ) 4384 connect = self._parse_assignment() 4385 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4386 4387 if not start and self._match(TokenType.START_WITH): 4388 start = self._parse_assignment() 4389 4390 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4391 4392 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4393 this = self._parse_id_var(any_token=True) 4394 if self._match(TokenType.ALIAS): 4395 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4396 return this 4397 4398 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4399 if self._match_text_seq("INTERPOLATE"): 4400 return self._parse_wrapped_csv(self._parse_name_as_expression) 4401 return None 4402 4403 def _parse_order( 4404 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4405 ) -> t.Optional[exp.Expression]: 4406 siblings = None 4407 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4408 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4409 return this 4410 4411 siblings = True 4412 4413 return self.expression( 4414 exp.Order, 4415 this=this, 4416 expressions=self._parse_csv(self._parse_ordered), 4417 siblings=siblings, 4418 ) 4419 4420 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4421 if not self._match(token): 4422 return None 4423 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4424 4425 def _parse_ordered( 4426 self, parse_method: t.Optional[t.Callable] = None 4427 ) -> t.Optional[exp.Ordered]: 4428 this = parse_method() if parse_method else self._parse_assignment() 4429 if not this: 4430 return None 4431 4432 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4433 this = exp.var("ALL") 4434 4435 asc = self._match(TokenType.ASC) 4436 desc = self._match(TokenType.DESC) or (asc and False) 4437 4438 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4439 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4440 4441 nulls_first = is_nulls_first or False 4442 explicitly_null_ordered = is_nulls_first or is_nulls_last 4443 4444 if ( 4445 not explicitly_null_ordered 4446 and ( 4447 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4448 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4449 ) 4450 and self.dialect.NULL_ORDERING != "nulls_are_last" 4451 ): 4452 nulls_first = True 4453 4454 if self._match_text_seq("WITH", "FILL"): 4455 with_fill = self.expression( 4456 exp.WithFill, 4457 **{ # type: ignore 4458 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4459 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4460 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4461 "interpolate": self._parse_interpolate(), 4462 }, 4463 ) 4464 else: 4465 with_fill = None 4466 4467 return self.expression( 4468 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4469 ) 4470 4471 def _parse_limit_options(self) -> exp.LimitOptions: 4472 percent = self._match(TokenType.PERCENT) 4473 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4474 self._match_text_seq("ONLY") 4475 with_ties = self._match_text_seq("WITH", "TIES") 4476 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4477 4478 def _parse_limit( 4479 self, 4480 this: t.Optional[exp.Expression] = None, 4481 top: bool = False, 4482 skip_limit_token: bool = False, 4483 ) -> t.Optional[exp.Expression]: 4484 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4485 comments = self._prev_comments 4486 if top: 4487 limit_paren = self._match(TokenType.L_PAREN) 4488 expression = self._parse_term() if limit_paren else self._parse_number() 4489 4490 if limit_paren: 4491 self._match_r_paren() 4492 4493 limit_options = self._parse_limit_options() 4494 else: 4495 limit_options = None 4496 expression = self._parse_term() 4497 4498 if self._match(TokenType.COMMA): 4499 offset = expression 4500 expression = self._parse_term() 4501 else: 4502 offset = None 4503 4504 limit_exp = self.expression( 4505 exp.Limit, 4506 this=this, 4507 expression=expression, 4508 offset=offset, 4509 comments=comments, 4510 limit_options=limit_options, 4511 expressions=self._parse_limit_by(), 4512 ) 4513 4514 return limit_exp 4515 4516 if self._match(TokenType.FETCH): 4517 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4518 direction = self._prev.text.upper() if direction else "FIRST" 4519 4520 count = self._parse_field(tokens=self.FETCH_TOKENS) 4521 4522 return self.expression( 4523 exp.Fetch, 4524 direction=direction, 4525 count=count, 4526 limit_options=self._parse_limit_options(), 4527 ) 4528 4529 return this 4530 4531 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4532 if not self._match(TokenType.OFFSET): 4533 return this 4534 4535 count = self._parse_term() 4536 self._match_set((TokenType.ROW, TokenType.ROWS)) 4537 4538 return self.expression( 4539 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4540 ) 4541 4542 def _can_parse_limit_or_offset(self) -> bool: 4543 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4544 return False 4545 4546 index = self._index 4547 result = bool( 4548 self._try_parse(self._parse_limit, retreat=True) 4549 or self._try_parse(self._parse_offset, retreat=True) 4550 ) 4551 self._retreat(index) 4552 return result 4553 4554 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4555 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4556 4557 def _parse_locks(self) -> t.List[exp.Lock]: 4558 locks = [] 4559 while True: 4560 if self._match_text_seq("FOR", "UPDATE"): 4561 update = True 4562 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4563 "LOCK", "IN", "SHARE", "MODE" 4564 ): 4565 update = False 4566 else: 4567 break 4568 4569 expressions = None 4570 if self._match_text_seq("OF"): 4571 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4572 4573 wait: t.Optional[bool | exp.Expression] = None 4574 if self._match_text_seq("NOWAIT"): 4575 wait = True 4576 elif self._match_text_seq("WAIT"): 4577 wait = self._parse_primary() 4578 elif self._match_text_seq("SKIP", "LOCKED"): 4579 wait = False 4580 4581 locks.append( 4582 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4583 ) 4584 4585 return locks 4586 4587 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4588 while this and self._match_set(self.SET_OPERATIONS): 4589 token_type = self._prev.token_type 4590 4591 if token_type == TokenType.UNION: 4592 operation: t.Type[exp.SetOperation] = exp.Union 4593 elif token_type == TokenType.EXCEPT: 4594 operation = exp.Except 4595 else: 4596 operation = exp.Intersect 4597 4598 comments = self._prev.comments 4599 4600 if self._match(TokenType.DISTINCT): 4601 distinct: t.Optional[bool] = True 4602 elif self._match(TokenType.ALL): 4603 distinct = False 4604 else: 4605 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4606 if distinct is None: 4607 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4608 4609 by_name = self._match_text_seq("BY", "NAME") 4610 expression = self._parse_select(nested=True, parse_set_operation=False) 4611 4612 this = self.expression( 4613 operation, 4614 comments=comments, 4615 this=this, 4616 distinct=distinct, 4617 by_name=by_name, 4618 expression=expression, 4619 ) 4620 4621 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4622 expression = this.expression 4623 4624 if expression: 4625 for arg in self.SET_OP_MODIFIERS: 4626 expr = expression.args.get(arg) 4627 if expr: 4628 this.set(arg, expr.pop()) 4629 4630 return this 4631 4632 def _parse_expression(self) -> t.Optional[exp.Expression]: 4633 return self._parse_alias(self._parse_assignment()) 4634 4635 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4636 this = self._parse_disjunction() 4637 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4638 # This allows us to parse <non-identifier token> := <expr> 4639 this = exp.column( 4640 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4641 ) 4642 4643 while self._match_set(self.ASSIGNMENT): 4644 if isinstance(this, exp.Column) and len(this.parts) == 1: 4645 this = this.this 4646 4647 this = self.expression( 4648 self.ASSIGNMENT[self._prev.token_type], 4649 this=this, 4650 comments=self._prev_comments, 4651 expression=self._parse_assignment(), 4652 ) 4653 4654 return this 4655 4656 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4657 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4658 4659 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4660 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4661 4662 def _parse_equality(self) -> t.Optional[exp.Expression]: 4663 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4664 4665 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4666 return self._parse_tokens(self._parse_range, self.COMPARISON) 4667 4668 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4669 this = this or self._parse_bitwise() 4670 negate = self._match(TokenType.NOT) 4671 4672 if self._match_set(self.RANGE_PARSERS): 4673 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4674 if not expression: 4675 return this 4676 4677 this = expression 4678 elif self._match(TokenType.ISNULL): 4679 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4680 4681 # Postgres supports ISNULL and NOTNULL for conditions. 4682 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4683 if self._match(TokenType.NOTNULL): 4684 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4685 this = self.expression(exp.Not, this=this) 4686 4687 if negate: 4688 this = self._negate_range(this) 4689 4690 if self._match(TokenType.IS): 4691 this = self._parse_is(this) 4692 4693 return this 4694 4695 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4696 if not this: 4697 return this 4698 4699 return self.expression(exp.Not, this=this) 4700 4701 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4702 index = self._index - 1 4703 negate = self._match(TokenType.NOT) 4704 4705 if self._match_text_seq("DISTINCT", "FROM"): 4706 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4707 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4708 4709 if self._match(TokenType.JSON): 4710 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4711 4712 if self._match_text_seq("WITH"): 4713 _with = True 4714 elif self._match_text_seq("WITHOUT"): 4715 _with = False 4716 else: 4717 _with = None 4718 4719 unique = self._match(TokenType.UNIQUE) 4720 self._match_text_seq("KEYS") 4721 expression: t.Optional[exp.Expression] = self.expression( 4722 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4723 ) 4724 else: 4725 expression = self._parse_primary() or self._parse_null() 4726 if not expression: 4727 self._retreat(index) 4728 return None 4729 4730 this = self.expression(exp.Is, this=this, expression=expression) 4731 return self.expression(exp.Not, this=this) if negate else this 4732 4733 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4734 unnest = self._parse_unnest(with_alias=False) 4735 if unnest: 4736 this = self.expression(exp.In, this=this, unnest=unnest) 4737 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4738 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4739 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4740 4741 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4742 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4743 else: 4744 this = self.expression(exp.In, this=this, expressions=expressions) 4745 4746 if matched_l_paren: 4747 self._match_r_paren(this) 4748 elif not self._match(TokenType.R_BRACKET, expression=this): 4749 self.raise_error("Expecting ]") 4750 else: 4751 this = self.expression(exp.In, this=this, field=self._parse_column()) 4752 4753 return this 4754 4755 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4756 low = self._parse_bitwise() 4757 self._match(TokenType.AND) 4758 high = self._parse_bitwise() 4759 return self.expression(exp.Between, this=this, low=low, high=high) 4760 4761 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4762 if not self._match(TokenType.ESCAPE): 4763 return this 4764 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4765 4766 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4767 index = self._index 4768 4769 if not self._match(TokenType.INTERVAL) and match_interval: 4770 return None 4771 4772 if self._match(TokenType.STRING, advance=False): 4773 this = self._parse_primary() 4774 else: 4775 this = self._parse_term() 4776 4777 if not this or ( 4778 isinstance(this, exp.Column) 4779 and not this.table 4780 and not this.this.quoted 4781 and this.name.upper() == "IS" 4782 ): 4783 self._retreat(index) 4784 return None 4785 4786 unit = self._parse_function() or ( 4787 not self._match(TokenType.ALIAS, advance=False) 4788 and self._parse_var(any_token=True, upper=True) 4789 ) 4790 4791 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4792 # each INTERVAL expression into this canonical form so it's easy to transpile 4793 if this and this.is_number: 4794 this = exp.Literal.string(this.to_py()) 4795 elif this and this.is_string: 4796 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4797 if parts and unit: 4798 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4799 unit = None 4800 self._retreat(self._index - 1) 4801 4802 if len(parts) == 1: 4803 this = exp.Literal.string(parts[0][0]) 4804 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4805 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4806 unit = self.expression( 4807 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4808 ) 4809 4810 interval = self.expression(exp.Interval, this=this, unit=unit) 4811 4812 index = self._index 4813 self._match(TokenType.PLUS) 4814 4815 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4816 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4817 return self.expression( 4818 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4819 ) 4820 4821 self._retreat(index) 4822 return interval 4823 4824 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4825 this = self._parse_term() 4826 4827 while True: 4828 if self._match_set(self.BITWISE): 4829 this = self.expression( 4830 self.BITWISE[self._prev.token_type], 4831 this=this, 4832 expression=self._parse_term(), 4833 ) 4834 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4835 this = self.expression( 4836 exp.DPipe, 4837 this=this, 4838 expression=self._parse_term(), 4839 safe=not self.dialect.STRICT_STRING_CONCAT, 4840 ) 4841 elif self._match(TokenType.DQMARK): 4842 this = self.expression( 4843 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4844 ) 4845 elif self._match_pair(TokenType.LT, TokenType.LT): 4846 this = self.expression( 4847 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4848 ) 4849 elif self._match_pair(TokenType.GT, TokenType.GT): 4850 this = self.expression( 4851 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4852 ) 4853 else: 4854 break 4855 4856 return this 4857 4858 def _parse_term(self) -> t.Optional[exp.Expression]: 4859 this = self._parse_factor() 4860 4861 while self._match_set(self.TERM): 4862 klass = self.TERM[self._prev.token_type] 4863 comments = self._prev_comments 4864 expression = self._parse_factor() 4865 4866 this = self.expression(klass, this=this, comments=comments, expression=expression) 4867 4868 if isinstance(this, exp.Collate): 4869 expr = this.expression 4870 4871 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4872 # fallback to Identifier / Var 4873 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4874 ident = expr.this 4875 if isinstance(ident, exp.Identifier): 4876 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4877 4878 return this 4879 4880 def _parse_factor(self) -> t.Optional[exp.Expression]: 4881 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4882 this = parse_method() 4883 4884 while self._match_set(self.FACTOR): 4885 klass = self.FACTOR[self._prev.token_type] 4886 comments = self._prev_comments 4887 expression = parse_method() 4888 4889 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4890 self._retreat(self._index - 1) 4891 return this 4892 4893 this = self.expression(klass, this=this, comments=comments, expression=expression) 4894 4895 if isinstance(this, exp.Div): 4896 this.args["typed"] = self.dialect.TYPED_DIVISION 4897 this.args["safe"] = self.dialect.SAFE_DIVISION 4898 4899 return this 4900 4901 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4902 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4903 4904 def _parse_unary(self) -> t.Optional[exp.Expression]: 4905 if self._match_set(self.UNARY_PARSERS): 4906 return self.UNARY_PARSERS[self._prev.token_type](self) 4907 return self._parse_at_time_zone(self._parse_type()) 4908 4909 def _parse_type( 4910 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4911 ) -> t.Optional[exp.Expression]: 4912 interval = parse_interval and self._parse_interval() 4913 if interval: 4914 return interval 4915 4916 index = self._index 4917 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4918 4919 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4920 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4921 if isinstance(data_type, exp.Cast): 4922 # This constructor can contain ops directly after it, for instance struct unnesting: 4923 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4924 return self._parse_column_ops(data_type) 4925 4926 if data_type: 4927 index2 = self._index 4928 this = self._parse_primary() 4929 4930 if isinstance(this, exp.Literal): 4931 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4932 if parser: 4933 return parser(self, this, data_type) 4934 4935 return self.expression(exp.Cast, this=this, to=data_type) 4936 4937 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4938 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4939 # 4940 # If the index difference here is greater than 1, that means the parser itself must have 4941 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4942 # 4943 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4944 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4945 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4946 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4947 # 4948 # In these cases, we don't really want to return the converted type, but instead retreat 4949 # and try to parse a Column or Identifier in the section below. 4950 if data_type.expressions and index2 - index > 1: 4951 self._retreat(index2) 4952 return self._parse_column_ops(data_type) 4953 4954 self._retreat(index) 4955 4956 if fallback_to_identifier: 4957 return self._parse_id_var() 4958 4959 this = self._parse_column() 4960 return this and self._parse_column_ops(this) 4961 4962 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4963 this = self._parse_type() 4964 if not this: 4965 return None 4966 4967 if isinstance(this, exp.Column) and not this.table: 4968 this = exp.var(this.name.upper()) 4969 4970 return self.expression( 4971 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4972 ) 4973 4974 def _parse_types( 4975 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4976 ) -> t.Optional[exp.Expression]: 4977 index = self._index 4978 4979 this: t.Optional[exp.Expression] = None 4980 prefix = self._match_text_seq("SYSUDTLIB", ".") 4981 4982 if not self._match_set(self.TYPE_TOKENS): 4983 identifier = allow_identifiers and self._parse_id_var( 4984 any_token=False, tokens=(TokenType.VAR,) 4985 ) 4986 if isinstance(identifier, exp.Identifier): 4987 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4988 4989 if len(tokens) != 1: 4990 self.raise_error("Unexpected identifier", self._prev) 4991 4992 if tokens[0].token_type in self.TYPE_TOKENS: 4993 self._prev = tokens[0] 4994 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4995 type_name = identifier.name 4996 4997 while self._match(TokenType.DOT): 4998 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4999 5000 this = exp.DataType.build(type_name, udt=True) 5001 else: 5002 self._retreat(self._index - 1) 5003 return None 5004 else: 5005 return None 5006 5007 type_token = self._prev.token_type 5008 5009 if type_token == TokenType.PSEUDO_TYPE: 5010 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5011 5012 if type_token == TokenType.OBJECT_IDENTIFIER: 5013 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5014 5015 # https://materialize.com/docs/sql/types/map/ 5016 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5017 key_type = self._parse_types( 5018 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5019 ) 5020 if not self._match(TokenType.FARROW): 5021 self._retreat(index) 5022 return None 5023 5024 value_type = self._parse_types( 5025 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5026 ) 5027 if not self._match(TokenType.R_BRACKET): 5028 self._retreat(index) 5029 return None 5030 5031 return exp.DataType( 5032 this=exp.DataType.Type.MAP, 5033 expressions=[key_type, value_type], 5034 nested=True, 5035 prefix=prefix, 5036 ) 5037 5038 nested = type_token in self.NESTED_TYPE_TOKENS 5039 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5040 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5041 expressions = None 5042 maybe_func = False 5043 5044 if self._match(TokenType.L_PAREN): 5045 if is_struct: 5046 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5047 elif nested: 5048 expressions = self._parse_csv( 5049 lambda: self._parse_types( 5050 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5051 ) 5052 ) 5053 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5054 this = expressions[0] 5055 this.set("nullable", True) 5056 self._match_r_paren() 5057 return this 5058 elif type_token in self.ENUM_TYPE_TOKENS: 5059 expressions = self._parse_csv(self._parse_equality) 5060 elif is_aggregate: 5061 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5062 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5063 ) 5064 if not func_or_ident: 5065 return None 5066 expressions = [func_or_ident] 5067 if self._match(TokenType.COMMA): 5068 expressions.extend( 5069 self._parse_csv( 5070 lambda: self._parse_types( 5071 check_func=check_func, 5072 schema=schema, 5073 allow_identifiers=allow_identifiers, 5074 ) 5075 ) 5076 ) 5077 else: 5078 expressions = self._parse_csv(self._parse_type_size) 5079 5080 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5081 if type_token == TokenType.VECTOR and len(expressions) == 2: 5082 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5083 5084 if not expressions or not self._match(TokenType.R_PAREN): 5085 self._retreat(index) 5086 return None 5087 5088 maybe_func = True 5089 5090 values: t.Optional[t.List[exp.Expression]] = None 5091 5092 if nested and self._match(TokenType.LT): 5093 if is_struct: 5094 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5095 else: 5096 expressions = self._parse_csv( 5097 lambda: self._parse_types( 5098 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5099 ) 5100 ) 5101 5102 if not self._match(TokenType.GT): 5103 self.raise_error("Expecting >") 5104 5105 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5106 values = self._parse_csv(self._parse_assignment) 5107 if not values and is_struct: 5108 values = None 5109 self._retreat(self._index - 1) 5110 else: 5111 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5112 5113 if type_token in self.TIMESTAMPS: 5114 if self._match_text_seq("WITH", "TIME", "ZONE"): 5115 maybe_func = False 5116 tz_type = ( 5117 exp.DataType.Type.TIMETZ 5118 if type_token in self.TIMES 5119 else exp.DataType.Type.TIMESTAMPTZ 5120 ) 5121 this = exp.DataType(this=tz_type, expressions=expressions) 5122 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5123 maybe_func = False 5124 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5125 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5126 maybe_func = False 5127 elif type_token == TokenType.INTERVAL: 5128 unit = self._parse_var(upper=True) 5129 if unit: 5130 if self._match_text_seq("TO"): 5131 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5132 5133 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5134 else: 5135 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5136 5137 if maybe_func and check_func: 5138 index2 = self._index 5139 peek = self._parse_string() 5140 5141 if not peek: 5142 self._retreat(index) 5143 return None 5144 5145 self._retreat(index2) 5146 5147 if not this: 5148 if self._match_text_seq("UNSIGNED"): 5149 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5150 if not unsigned_type_token: 5151 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5152 5153 type_token = unsigned_type_token or type_token 5154 5155 this = exp.DataType( 5156 this=exp.DataType.Type[type_token.value], 5157 expressions=expressions, 5158 nested=nested, 5159 prefix=prefix, 5160 ) 5161 5162 # Empty arrays/structs are allowed 5163 if values is not None: 5164 cls = exp.Struct if is_struct else exp.Array 5165 this = exp.cast(cls(expressions=values), this, copy=False) 5166 5167 elif expressions: 5168 this.set("expressions", expressions) 5169 5170 # https://materialize.com/docs/sql/types/list/#type-name 5171 while self._match(TokenType.LIST): 5172 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5173 5174 index = self._index 5175 5176 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5177 matched_array = self._match(TokenType.ARRAY) 5178 5179 while self._curr: 5180 datatype_token = self._prev.token_type 5181 matched_l_bracket = self._match(TokenType.L_BRACKET) 5182 5183 if (not matched_l_bracket and not matched_array) or ( 5184 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5185 ): 5186 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5187 # not to be confused with the fixed size array parsing 5188 break 5189 5190 matched_array = False 5191 values = self._parse_csv(self._parse_assignment) or None 5192 if ( 5193 values 5194 and not schema 5195 and ( 5196 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5197 ) 5198 ): 5199 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5200 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5201 self._retreat(index) 5202 break 5203 5204 this = exp.DataType( 5205 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5206 ) 5207 self._match(TokenType.R_BRACKET) 5208 5209 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5210 converter = self.TYPE_CONVERTERS.get(this.this) 5211 if converter: 5212 this = converter(t.cast(exp.DataType, this)) 5213 5214 return this 5215 5216 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5217 index = self._index 5218 5219 if ( 5220 self._curr 5221 and self._next 5222 and self._curr.token_type in self.TYPE_TOKENS 5223 and self._next.token_type in self.TYPE_TOKENS 5224 ): 5225 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5226 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5227 this = self._parse_id_var() 5228 else: 5229 this = ( 5230 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5231 or self._parse_id_var() 5232 ) 5233 5234 self._match(TokenType.COLON) 5235 5236 if ( 5237 type_required 5238 and not isinstance(this, exp.DataType) 5239 and not self._match_set(self.TYPE_TOKENS, advance=False) 5240 ): 5241 self._retreat(index) 5242 return self._parse_types() 5243 5244 return self._parse_column_def(this) 5245 5246 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5247 if not self._match_text_seq("AT", "TIME", "ZONE"): 5248 return this 5249 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5250 5251 def _parse_column(self) -> t.Optional[exp.Expression]: 5252 this = self._parse_column_reference() 5253 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5254 5255 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5256 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5257 5258 return column 5259 5260 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5261 this = self._parse_field() 5262 if ( 5263 not this 5264 and self._match(TokenType.VALUES, advance=False) 5265 and self.VALUES_FOLLOWED_BY_PAREN 5266 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5267 ): 5268 this = self._parse_id_var() 5269 5270 if isinstance(this, exp.Identifier): 5271 # We bubble up comments from the Identifier to the Column 5272 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5273 5274 return this 5275 5276 def _parse_colon_as_variant_extract( 5277 self, this: t.Optional[exp.Expression] 5278 ) -> t.Optional[exp.Expression]: 5279 casts = [] 5280 json_path = [] 5281 escape = None 5282 5283 while self._match(TokenType.COLON): 5284 start_index = self._index 5285 5286 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5287 path = self._parse_column_ops( 5288 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5289 ) 5290 5291 # The cast :: operator has a lower precedence than the extraction operator :, so 5292 # we rearrange the AST appropriately to avoid casting the JSON path 5293 while isinstance(path, exp.Cast): 5294 casts.append(path.to) 5295 path = path.this 5296 5297 if casts: 5298 dcolon_offset = next( 5299 i 5300 for i, t in enumerate(self._tokens[start_index:]) 5301 if t.token_type == TokenType.DCOLON 5302 ) 5303 end_token = self._tokens[start_index + dcolon_offset - 1] 5304 else: 5305 end_token = self._prev 5306 5307 if path: 5308 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5309 # it'll roundtrip to a string literal in GET_PATH 5310 if isinstance(path, exp.Identifier) and path.quoted: 5311 escape = True 5312 5313 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5314 5315 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5316 # Databricks transforms it back to the colon/dot notation 5317 if json_path: 5318 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5319 5320 if json_path_expr: 5321 json_path_expr.set("escape", escape) 5322 5323 this = self.expression( 5324 exp.JSONExtract, 5325 this=this, 5326 expression=json_path_expr, 5327 variant_extract=True, 5328 ) 5329 5330 while casts: 5331 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5332 5333 return this 5334 5335 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5336 return self._parse_types() 5337 5338 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5339 this = self._parse_bracket(this) 5340 5341 while self._match_set(self.COLUMN_OPERATORS): 5342 op_token = self._prev.token_type 5343 op = self.COLUMN_OPERATORS.get(op_token) 5344 5345 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5346 field = self._parse_dcolon() 5347 if not field: 5348 self.raise_error("Expected type") 5349 elif op and self._curr: 5350 field = self._parse_column_reference() or self._parse_bracket() 5351 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5352 field = self._parse_column_ops(field) 5353 else: 5354 field = self._parse_field(any_token=True, anonymous_func=True) 5355 5356 if isinstance(field, (exp.Func, exp.Window)) and this: 5357 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5358 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5359 this = exp.replace_tree( 5360 this, 5361 lambda n: ( 5362 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5363 if n.table 5364 else n.this 5365 ) 5366 if isinstance(n, exp.Column) 5367 else n, 5368 ) 5369 5370 if op: 5371 this = op(self, this, field) 5372 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5373 this = self.expression( 5374 exp.Column, 5375 comments=this.comments, 5376 this=field, 5377 table=this.this, 5378 db=this.args.get("table"), 5379 catalog=this.args.get("db"), 5380 ) 5381 elif isinstance(field, exp.Window): 5382 # Move the exp.Dot's to the window's function 5383 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5384 field.set("this", window_func) 5385 this = field 5386 else: 5387 this = self.expression(exp.Dot, this=this, expression=field) 5388 5389 if field and field.comments: 5390 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5391 5392 this = self._parse_bracket(this) 5393 5394 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5395 5396 def _parse_primary(self) -> t.Optional[exp.Expression]: 5397 if self._match_set(self.PRIMARY_PARSERS): 5398 token_type = self._prev.token_type 5399 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5400 5401 if token_type == TokenType.STRING: 5402 expressions = [primary] 5403 while self._match(TokenType.STRING): 5404 expressions.append(exp.Literal.string(self._prev.text)) 5405 5406 if len(expressions) > 1: 5407 return self.expression(exp.Concat, expressions=expressions) 5408 5409 return primary 5410 5411 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5412 return exp.Literal.number(f"0.{self._prev.text}") 5413 5414 if self._match(TokenType.L_PAREN): 5415 comments = self._prev_comments 5416 query = self._parse_select() 5417 5418 if query: 5419 expressions = [query] 5420 else: 5421 expressions = self._parse_expressions() 5422 5423 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5424 5425 if not this and self._match(TokenType.R_PAREN, advance=False): 5426 this = self.expression(exp.Tuple) 5427 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5428 this = self._parse_subquery(this=this, parse_alias=False) 5429 elif isinstance(this, exp.Subquery): 5430 this = self._parse_subquery( 5431 this=self._parse_set_operations(this), parse_alias=False 5432 ) 5433 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5434 this = self.expression(exp.Tuple, expressions=expressions) 5435 else: 5436 this = self.expression(exp.Paren, this=this) 5437 5438 if this: 5439 this.add_comments(comments) 5440 5441 self._match_r_paren(expression=this) 5442 return this 5443 5444 return None 5445 5446 def _parse_field( 5447 self, 5448 any_token: bool = False, 5449 tokens: t.Optional[t.Collection[TokenType]] = None, 5450 anonymous_func: bool = False, 5451 ) -> t.Optional[exp.Expression]: 5452 if anonymous_func: 5453 field = ( 5454 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5455 or self._parse_primary() 5456 ) 5457 else: 5458 field = self._parse_primary() or self._parse_function( 5459 anonymous=anonymous_func, any_token=any_token 5460 ) 5461 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5462 5463 def _parse_function( 5464 self, 5465 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5466 anonymous: bool = False, 5467 optional_parens: bool = True, 5468 any_token: bool = False, 5469 ) -> t.Optional[exp.Expression]: 5470 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5471 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5472 fn_syntax = False 5473 if ( 5474 self._match(TokenType.L_BRACE, advance=False) 5475 and self._next 5476 and self._next.text.upper() == "FN" 5477 ): 5478 self._advance(2) 5479 fn_syntax = True 5480 5481 func = self._parse_function_call( 5482 functions=functions, 5483 anonymous=anonymous, 5484 optional_parens=optional_parens, 5485 any_token=any_token, 5486 ) 5487 5488 if fn_syntax: 5489 self._match(TokenType.R_BRACE) 5490 5491 return func 5492 5493 def _parse_function_call( 5494 self, 5495 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5496 anonymous: bool = False, 5497 optional_parens: bool = True, 5498 any_token: bool = False, 5499 ) -> t.Optional[exp.Expression]: 5500 if not self._curr: 5501 return None 5502 5503 comments = self._curr.comments 5504 token_type = self._curr.token_type 5505 this = self._curr.text 5506 upper = this.upper() 5507 5508 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5509 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5510 self._advance() 5511 return self._parse_window(parser(self)) 5512 5513 if not self._next or self._next.token_type != TokenType.L_PAREN: 5514 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5515 self._advance() 5516 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5517 5518 return None 5519 5520 if any_token: 5521 if token_type in self.RESERVED_TOKENS: 5522 return None 5523 elif token_type not in self.FUNC_TOKENS: 5524 return None 5525 5526 self._advance(2) 5527 5528 parser = self.FUNCTION_PARSERS.get(upper) 5529 if parser and not anonymous: 5530 this = parser(self) 5531 else: 5532 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5533 5534 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5535 this = self.expression( 5536 subquery_predicate, comments=comments, this=self._parse_select() 5537 ) 5538 self._match_r_paren() 5539 return this 5540 5541 if functions is None: 5542 functions = self.FUNCTIONS 5543 5544 function = functions.get(upper) 5545 known_function = function and not anonymous 5546 5547 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5548 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5549 5550 post_func_comments = self._curr and self._curr.comments 5551 if known_function and post_func_comments: 5552 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5553 # call we'll construct it as exp.Anonymous, even if it's "known" 5554 if any( 5555 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5556 for comment in post_func_comments 5557 ): 5558 known_function = False 5559 5560 if alias and known_function: 5561 args = self._kv_to_prop_eq(args) 5562 5563 if known_function: 5564 func_builder = t.cast(t.Callable, function) 5565 5566 if "dialect" in func_builder.__code__.co_varnames: 5567 func = func_builder(args, dialect=self.dialect) 5568 else: 5569 func = func_builder(args) 5570 5571 func = self.validate_expression(func, args) 5572 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5573 func.meta["name"] = this 5574 5575 this = func 5576 else: 5577 if token_type == TokenType.IDENTIFIER: 5578 this = exp.Identifier(this=this, quoted=True) 5579 this = self.expression(exp.Anonymous, this=this, expressions=args) 5580 5581 if isinstance(this, exp.Expression): 5582 this.add_comments(comments) 5583 5584 self._match_r_paren(this) 5585 return self._parse_window(this) 5586 5587 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5588 return expression 5589 5590 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5591 transformed = [] 5592 5593 for index, e in enumerate(expressions): 5594 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5595 if isinstance(e, exp.Alias): 5596 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5597 5598 if not isinstance(e, exp.PropertyEQ): 5599 e = self.expression( 5600 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5601 ) 5602 5603 if isinstance(e.this, exp.Column): 5604 e.this.replace(e.this.this) 5605 else: 5606 e = self._to_prop_eq(e, index) 5607 5608 transformed.append(e) 5609 5610 return transformed 5611 5612 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5613 return self._parse_statement() 5614 5615 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5616 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5617 5618 def _parse_user_defined_function( 5619 self, kind: t.Optional[TokenType] = None 5620 ) -> t.Optional[exp.Expression]: 5621 this = self._parse_table_parts(schema=True) 5622 5623 if not self._match(TokenType.L_PAREN): 5624 return this 5625 5626 expressions = self._parse_csv(self._parse_function_parameter) 5627 self._match_r_paren() 5628 return self.expression( 5629 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5630 ) 5631 5632 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5633 literal = self._parse_primary() 5634 if literal: 5635 return self.expression(exp.Introducer, this=token.text, expression=literal) 5636 5637 return self.expression(exp.Identifier, this=token.text) 5638 5639 def _parse_session_parameter(self) -> exp.SessionParameter: 5640 kind = None 5641 this = self._parse_id_var() or self._parse_primary() 5642 5643 if this and self._match(TokenType.DOT): 5644 kind = this.name 5645 this = self._parse_var() or self._parse_primary() 5646 5647 return self.expression(exp.SessionParameter, this=this, kind=kind) 5648 5649 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5650 return self._parse_id_var() 5651 5652 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5653 index = self._index 5654 5655 if self._match(TokenType.L_PAREN): 5656 expressions = t.cast( 5657 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5658 ) 5659 5660 if not self._match(TokenType.R_PAREN): 5661 self._retreat(index) 5662 else: 5663 expressions = [self._parse_lambda_arg()] 5664 5665 if self._match_set(self.LAMBDAS): 5666 return self.LAMBDAS[self._prev.token_type](self, expressions) 5667 5668 self._retreat(index) 5669 5670 this: t.Optional[exp.Expression] 5671 5672 if self._match(TokenType.DISTINCT): 5673 this = self.expression( 5674 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5675 ) 5676 else: 5677 this = self._parse_select_or_expression(alias=alias) 5678 5679 return self._parse_limit( 5680 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5681 ) 5682 5683 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5684 index = self._index 5685 if not self._match(TokenType.L_PAREN): 5686 return this 5687 5688 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5689 # expr can be of both types 5690 if self._match_set(self.SELECT_START_TOKENS): 5691 self._retreat(index) 5692 return this 5693 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5694 self._match_r_paren() 5695 return self.expression(exp.Schema, this=this, expressions=args) 5696 5697 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5698 return self._parse_column_def(self._parse_field(any_token=True)) 5699 5700 def _parse_column_def( 5701 self, this: t.Optional[exp.Expression], computed_column: bool = True 5702 ) -> t.Optional[exp.Expression]: 5703 # column defs are not really columns, they're identifiers 5704 if isinstance(this, exp.Column): 5705 this = this.this 5706 5707 if not computed_column: 5708 self._match(TokenType.ALIAS) 5709 5710 kind = self._parse_types(schema=True) 5711 5712 if self._match_text_seq("FOR", "ORDINALITY"): 5713 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5714 5715 constraints: t.List[exp.Expression] = [] 5716 5717 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5718 ("ALIAS", "MATERIALIZED") 5719 ): 5720 persisted = self._prev.text.upper() == "MATERIALIZED" 5721 constraint_kind = exp.ComputedColumnConstraint( 5722 this=self._parse_assignment(), 5723 persisted=persisted or self._match_text_seq("PERSISTED"), 5724 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5725 ) 5726 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5727 elif ( 5728 kind 5729 and self._match(TokenType.ALIAS, advance=False) 5730 and ( 5731 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5732 or (self._next and self._next.token_type == TokenType.L_PAREN) 5733 ) 5734 ): 5735 self._advance() 5736 constraints.append( 5737 self.expression( 5738 exp.ColumnConstraint, 5739 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5740 ) 5741 ) 5742 5743 while True: 5744 constraint = self._parse_column_constraint() 5745 if not constraint: 5746 break 5747 constraints.append(constraint) 5748 5749 if not kind and not constraints: 5750 return this 5751 5752 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5753 5754 def _parse_auto_increment( 5755 self, 5756 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5757 start = None 5758 increment = None 5759 5760 if self._match(TokenType.L_PAREN, advance=False): 5761 args = self._parse_wrapped_csv(self._parse_bitwise) 5762 start = seq_get(args, 0) 5763 increment = seq_get(args, 1) 5764 elif self._match_text_seq("START"): 5765 start = self._parse_bitwise() 5766 self._match_text_seq("INCREMENT") 5767 increment = self._parse_bitwise() 5768 5769 if start and increment: 5770 return exp.GeneratedAsIdentityColumnConstraint( 5771 start=start, increment=increment, this=False 5772 ) 5773 5774 return exp.AutoIncrementColumnConstraint() 5775 5776 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5777 if not self._match_text_seq("REFRESH"): 5778 self._retreat(self._index - 1) 5779 return None 5780 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5781 5782 def _parse_compress(self) -> exp.CompressColumnConstraint: 5783 if self._match(TokenType.L_PAREN, advance=False): 5784 return self.expression( 5785 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5786 ) 5787 5788 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5789 5790 def _parse_generated_as_identity( 5791 self, 5792 ) -> ( 5793 exp.GeneratedAsIdentityColumnConstraint 5794 | exp.ComputedColumnConstraint 5795 | exp.GeneratedAsRowColumnConstraint 5796 ): 5797 if self._match_text_seq("BY", "DEFAULT"): 5798 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5799 this = self.expression( 5800 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5801 ) 5802 else: 5803 self._match_text_seq("ALWAYS") 5804 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5805 5806 self._match(TokenType.ALIAS) 5807 5808 if self._match_text_seq("ROW"): 5809 start = self._match_text_seq("START") 5810 if not start: 5811 self._match(TokenType.END) 5812 hidden = self._match_text_seq("HIDDEN") 5813 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5814 5815 identity = self._match_text_seq("IDENTITY") 5816 5817 if self._match(TokenType.L_PAREN): 5818 if self._match(TokenType.START_WITH): 5819 this.set("start", self._parse_bitwise()) 5820 if self._match_text_seq("INCREMENT", "BY"): 5821 this.set("increment", self._parse_bitwise()) 5822 if self._match_text_seq("MINVALUE"): 5823 this.set("minvalue", self._parse_bitwise()) 5824 if self._match_text_seq("MAXVALUE"): 5825 this.set("maxvalue", self._parse_bitwise()) 5826 5827 if self._match_text_seq("CYCLE"): 5828 this.set("cycle", True) 5829 elif self._match_text_seq("NO", "CYCLE"): 5830 this.set("cycle", False) 5831 5832 if not identity: 5833 this.set("expression", self._parse_range()) 5834 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5835 args = self._parse_csv(self._parse_bitwise) 5836 this.set("start", seq_get(args, 0)) 5837 this.set("increment", seq_get(args, 1)) 5838 5839 self._match_r_paren() 5840 5841 return this 5842 5843 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5844 self._match_text_seq("LENGTH") 5845 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5846 5847 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5848 if self._match_text_seq("NULL"): 5849 return self.expression(exp.NotNullColumnConstraint) 5850 if self._match_text_seq("CASESPECIFIC"): 5851 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5852 if self._match_text_seq("FOR", "REPLICATION"): 5853 return self.expression(exp.NotForReplicationColumnConstraint) 5854 5855 # Unconsume the `NOT` token 5856 self._retreat(self._index - 1) 5857 return None 5858 5859 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5860 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5861 5862 procedure_option_follows = ( 5863 self._match(TokenType.WITH, advance=False) 5864 and self._next 5865 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5866 ) 5867 5868 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5869 return self.expression( 5870 exp.ColumnConstraint, 5871 this=this, 5872 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5873 ) 5874 5875 return this 5876 5877 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5878 if not self._match(TokenType.CONSTRAINT): 5879 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5880 5881 return self.expression( 5882 exp.Constraint, 5883 this=self._parse_id_var(), 5884 expressions=self._parse_unnamed_constraints(), 5885 ) 5886 5887 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5888 constraints = [] 5889 while True: 5890 constraint = self._parse_unnamed_constraint() or self._parse_function() 5891 if not constraint: 5892 break 5893 constraints.append(constraint) 5894 5895 return constraints 5896 5897 def _parse_unnamed_constraint( 5898 self, constraints: t.Optional[t.Collection[str]] = None 5899 ) -> t.Optional[exp.Expression]: 5900 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5901 constraints or self.CONSTRAINT_PARSERS 5902 ): 5903 return None 5904 5905 constraint = self._prev.text.upper() 5906 if constraint not in self.CONSTRAINT_PARSERS: 5907 self.raise_error(f"No parser found for schema constraint {constraint}.") 5908 5909 return self.CONSTRAINT_PARSERS[constraint](self) 5910 5911 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5912 return self._parse_id_var(any_token=False) 5913 5914 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5915 self._match_text_seq("KEY") 5916 return self.expression( 5917 exp.UniqueColumnConstraint, 5918 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5919 this=self._parse_schema(self._parse_unique_key()), 5920 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5921 on_conflict=self._parse_on_conflict(), 5922 ) 5923 5924 def _parse_key_constraint_options(self) -> t.List[str]: 5925 options = [] 5926 while True: 5927 if not self._curr: 5928 break 5929 5930 if self._match(TokenType.ON): 5931 action = None 5932 on = self._advance_any() and self._prev.text 5933 5934 if self._match_text_seq("NO", "ACTION"): 5935 action = "NO ACTION" 5936 elif self._match_text_seq("CASCADE"): 5937 action = "CASCADE" 5938 elif self._match_text_seq("RESTRICT"): 5939 action = "RESTRICT" 5940 elif self._match_pair(TokenType.SET, TokenType.NULL): 5941 action = "SET NULL" 5942 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5943 action = "SET DEFAULT" 5944 else: 5945 self.raise_error("Invalid key constraint") 5946 5947 options.append(f"ON {on} {action}") 5948 else: 5949 var = self._parse_var_from_options( 5950 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5951 ) 5952 if not var: 5953 break 5954 options.append(var.name) 5955 5956 return options 5957 5958 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5959 if match and not self._match(TokenType.REFERENCES): 5960 return None 5961 5962 expressions = None 5963 this = self._parse_table(schema=True) 5964 options = self._parse_key_constraint_options() 5965 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5966 5967 def _parse_foreign_key(self) -> exp.ForeignKey: 5968 expressions = self._parse_wrapped_id_vars() 5969 reference = self._parse_references() 5970 options = {} 5971 5972 while self._match(TokenType.ON): 5973 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5974 self.raise_error("Expected DELETE or UPDATE") 5975 5976 kind = self._prev.text.lower() 5977 5978 if self._match_text_seq("NO", "ACTION"): 5979 action = "NO ACTION" 5980 elif self._match(TokenType.SET): 5981 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5982 action = "SET " + self._prev.text.upper() 5983 else: 5984 self._advance() 5985 action = self._prev.text.upper() 5986 5987 options[kind] = action 5988 5989 return self.expression( 5990 exp.ForeignKey, 5991 expressions=expressions, 5992 reference=reference, 5993 **options, # type: ignore 5994 ) 5995 5996 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5997 return self._parse_ordered() or self._parse_field() 5998 5999 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6000 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6001 self._retreat(self._index - 1) 6002 return None 6003 6004 id_vars = self._parse_wrapped_id_vars() 6005 return self.expression( 6006 exp.PeriodForSystemTimeConstraint, 6007 this=seq_get(id_vars, 0), 6008 expression=seq_get(id_vars, 1), 6009 ) 6010 6011 def _parse_primary_key( 6012 self, wrapped_optional: bool = False, in_props: bool = False 6013 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6014 desc = ( 6015 self._match_set((TokenType.ASC, TokenType.DESC)) 6016 and self._prev.token_type == TokenType.DESC 6017 ) 6018 6019 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6020 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 6021 6022 expressions = self._parse_wrapped_csv( 6023 self._parse_primary_key_part, optional=wrapped_optional 6024 ) 6025 options = self._parse_key_constraint_options() 6026 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6027 6028 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6029 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6030 6031 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6032 """ 6033 Parses a datetime column in ODBC format. We parse the column into the corresponding 6034 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6035 same as we did for `DATE('yyyy-mm-dd')`. 6036 6037 Reference: 6038 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6039 """ 6040 self._match(TokenType.VAR) 6041 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6042 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6043 if not self._match(TokenType.R_BRACE): 6044 self.raise_error("Expected }") 6045 return expression 6046 6047 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6048 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6049 return this 6050 6051 bracket_kind = self._prev.token_type 6052 if ( 6053 bracket_kind == TokenType.L_BRACE 6054 and self._curr 6055 and self._curr.token_type == TokenType.VAR 6056 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6057 ): 6058 return self._parse_odbc_datetime_literal() 6059 6060 expressions = self._parse_csv( 6061 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6062 ) 6063 6064 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6065 self.raise_error("Expected ]") 6066 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6067 self.raise_error("Expected }") 6068 6069 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6070 if bracket_kind == TokenType.L_BRACE: 6071 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6072 elif not this: 6073 this = build_array_constructor( 6074 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6075 ) 6076 else: 6077 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6078 if constructor_type: 6079 return build_array_constructor( 6080 constructor_type, 6081 args=expressions, 6082 bracket_kind=bracket_kind, 6083 dialect=self.dialect, 6084 ) 6085 6086 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 6087 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6088 6089 self._add_comments(this) 6090 return self._parse_bracket(this) 6091 6092 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6093 if self._match(TokenType.COLON): 6094 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6095 return this 6096 6097 def _parse_case(self) -> t.Optional[exp.Expression]: 6098 ifs = [] 6099 default = None 6100 6101 comments = self._prev_comments 6102 expression = self._parse_assignment() 6103 6104 while self._match(TokenType.WHEN): 6105 this = self._parse_assignment() 6106 self._match(TokenType.THEN) 6107 then = self._parse_assignment() 6108 ifs.append(self.expression(exp.If, this=this, true=then)) 6109 6110 if self._match(TokenType.ELSE): 6111 default = self._parse_assignment() 6112 6113 if not self._match(TokenType.END): 6114 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6115 default = exp.column("interval") 6116 else: 6117 self.raise_error("Expected END after CASE", self._prev) 6118 6119 return self.expression( 6120 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6121 ) 6122 6123 def _parse_if(self) -> t.Optional[exp.Expression]: 6124 if self._match(TokenType.L_PAREN): 6125 args = self._parse_csv(self._parse_assignment) 6126 this = self.validate_expression(exp.If.from_arg_list(args), args) 6127 self._match_r_paren() 6128 else: 6129 index = self._index - 1 6130 6131 if self.NO_PAREN_IF_COMMANDS and index == 0: 6132 return self._parse_as_command(self._prev) 6133 6134 condition = self._parse_assignment() 6135 6136 if not condition: 6137 self._retreat(index) 6138 return None 6139 6140 self._match(TokenType.THEN) 6141 true = self._parse_assignment() 6142 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6143 self._match(TokenType.END) 6144 this = self.expression(exp.If, this=condition, true=true, false=false) 6145 6146 return this 6147 6148 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6149 if not self._match_text_seq("VALUE", "FOR"): 6150 self._retreat(self._index - 1) 6151 return None 6152 6153 return self.expression( 6154 exp.NextValueFor, 6155 this=self._parse_column(), 6156 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6157 ) 6158 6159 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6160 this = self._parse_function() or self._parse_var_or_string(upper=True) 6161 6162 if self._match(TokenType.FROM): 6163 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6164 6165 if not self._match(TokenType.COMMA): 6166 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6167 6168 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6169 6170 def _parse_gap_fill(self) -> exp.GapFill: 6171 self._match(TokenType.TABLE) 6172 this = self._parse_table() 6173 6174 self._match(TokenType.COMMA) 6175 args = [this, *self._parse_csv(self._parse_lambda)] 6176 6177 gap_fill = exp.GapFill.from_arg_list(args) 6178 return self.validate_expression(gap_fill, args) 6179 6180 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6181 this = self._parse_assignment() 6182 6183 if not self._match(TokenType.ALIAS): 6184 if self._match(TokenType.COMMA): 6185 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6186 6187 self.raise_error("Expected AS after CAST") 6188 6189 fmt = None 6190 to = self._parse_types() 6191 6192 default = self._match(TokenType.DEFAULT) 6193 if default: 6194 default = self._parse_bitwise() 6195 self._match_text_seq("ON", "CONVERSION", "ERROR") 6196 6197 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6198 fmt_string = self._parse_string() 6199 fmt = self._parse_at_time_zone(fmt_string) 6200 6201 if not to: 6202 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6203 if to.this in exp.DataType.TEMPORAL_TYPES: 6204 this = self.expression( 6205 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6206 this=this, 6207 format=exp.Literal.string( 6208 format_time( 6209 fmt_string.this if fmt_string else "", 6210 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6211 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6212 ) 6213 ), 6214 safe=safe, 6215 ) 6216 6217 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6218 this.set("zone", fmt.args["zone"]) 6219 return this 6220 elif not to: 6221 self.raise_error("Expected TYPE after CAST") 6222 elif isinstance(to, exp.Identifier): 6223 to = exp.DataType.build(to.name, udt=True) 6224 elif to.this == exp.DataType.Type.CHAR: 6225 if self._match(TokenType.CHARACTER_SET): 6226 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6227 6228 return self.expression( 6229 exp.Cast if strict else exp.TryCast, 6230 this=this, 6231 to=to, 6232 format=fmt, 6233 safe=safe, 6234 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6235 default=default, 6236 ) 6237 6238 def _parse_string_agg(self) -> exp.GroupConcat: 6239 if self._match(TokenType.DISTINCT): 6240 args: t.List[t.Optional[exp.Expression]] = [ 6241 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6242 ] 6243 if self._match(TokenType.COMMA): 6244 args.extend(self._parse_csv(self._parse_assignment)) 6245 else: 6246 args = self._parse_csv(self._parse_assignment) # type: ignore 6247 6248 if self._match_text_seq("ON", "OVERFLOW"): 6249 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6250 if self._match_text_seq("ERROR"): 6251 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6252 else: 6253 self._match_text_seq("TRUNCATE") 6254 on_overflow = self.expression( 6255 exp.OverflowTruncateBehavior, 6256 this=self._parse_string(), 6257 with_count=( 6258 self._match_text_seq("WITH", "COUNT") 6259 or not self._match_text_seq("WITHOUT", "COUNT") 6260 ), 6261 ) 6262 else: 6263 on_overflow = None 6264 6265 index = self._index 6266 if not self._match(TokenType.R_PAREN) and args: 6267 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6268 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6269 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6270 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6271 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6272 6273 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6274 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6275 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6276 if not self._match_text_seq("WITHIN", "GROUP"): 6277 self._retreat(index) 6278 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6279 6280 # The corresponding match_r_paren will be called in parse_function (caller) 6281 self._match_l_paren() 6282 6283 return self.expression( 6284 exp.GroupConcat, 6285 this=self._parse_order(this=seq_get(args, 0)), 6286 separator=seq_get(args, 1), 6287 on_overflow=on_overflow, 6288 ) 6289 6290 def _parse_convert( 6291 self, strict: bool, safe: t.Optional[bool] = None 6292 ) -> t.Optional[exp.Expression]: 6293 this = self._parse_bitwise() 6294 6295 if self._match(TokenType.USING): 6296 to: t.Optional[exp.Expression] = self.expression( 6297 exp.CharacterSet, this=self._parse_var() 6298 ) 6299 elif self._match(TokenType.COMMA): 6300 to = self._parse_types() 6301 else: 6302 to = None 6303 6304 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6305 6306 def _parse_xml_table(self) -> exp.XMLTable: 6307 namespaces = None 6308 passing = None 6309 columns = None 6310 6311 if self._match_text_seq("XMLNAMESPACES", "("): 6312 namespaces = self._parse_xml_namespace() 6313 self._match_text_seq(")", ",") 6314 6315 this = self._parse_string() 6316 6317 if self._match_text_seq("PASSING"): 6318 # The BY VALUE keywords are optional and are provided for semantic clarity 6319 self._match_text_seq("BY", "VALUE") 6320 passing = self._parse_csv(self._parse_column) 6321 6322 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6323 6324 if self._match_text_seq("COLUMNS"): 6325 columns = self._parse_csv(self._parse_field_def) 6326 6327 return self.expression( 6328 exp.XMLTable, 6329 this=this, 6330 namespaces=namespaces, 6331 passing=passing, 6332 columns=columns, 6333 by_ref=by_ref, 6334 ) 6335 6336 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6337 namespaces = [] 6338 6339 while True: 6340 if self._match(TokenType.DEFAULT): 6341 uri = self._parse_string() 6342 else: 6343 uri = self._parse_alias(self._parse_string()) 6344 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6345 if not self._match(TokenType.COMMA): 6346 break 6347 6348 return namespaces 6349 6350 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6351 """ 6352 There are generally two variants of the DECODE function: 6353 6354 - DECODE(bin, charset) 6355 - DECODE(expression, search, result [, search, result] ... [, default]) 6356 6357 The second variant will always be parsed into a CASE expression. Note that NULL 6358 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6359 instead of relying on pattern matching. 6360 """ 6361 args = self._parse_csv(self._parse_assignment) 6362 6363 if len(args) < 3: 6364 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6365 6366 expression, *expressions = args 6367 if not expression: 6368 return None 6369 6370 ifs = [] 6371 for search, result in zip(expressions[::2], expressions[1::2]): 6372 if not search or not result: 6373 return None 6374 6375 if isinstance(search, exp.Literal): 6376 ifs.append( 6377 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6378 ) 6379 elif isinstance(search, exp.Null): 6380 ifs.append( 6381 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6382 ) 6383 else: 6384 cond = exp.or_( 6385 exp.EQ(this=expression.copy(), expression=search), 6386 exp.and_( 6387 exp.Is(this=expression.copy(), expression=exp.Null()), 6388 exp.Is(this=search.copy(), expression=exp.Null()), 6389 copy=False, 6390 ), 6391 copy=False, 6392 ) 6393 ifs.append(exp.If(this=cond, true=result)) 6394 6395 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6396 6397 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6398 self._match_text_seq("KEY") 6399 key = self._parse_column() 6400 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6401 self._match_text_seq("VALUE") 6402 value = self._parse_bitwise() 6403 6404 if not key and not value: 6405 return None 6406 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6407 6408 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6409 if not this or not self._match_text_seq("FORMAT", "JSON"): 6410 return this 6411 6412 return self.expression(exp.FormatJson, this=this) 6413 6414 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6415 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6416 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6417 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6418 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6419 else: 6420 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6421 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6422 6423 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6424 6425 if not empty and not error and not null: 6426 return None 6427 6428 return self.expression( 6429 exp.OnCondition, 6430 empty=empty, 6431 error=error, 6432 null=null, 6433 ) 6434 6435 def _parse_on_handling( 6436 self, on: str, *values: str 6437 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6438 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6439 for value in values: 6440 if self._match_text_seq(value, "ON", on): 6441 return f"{value} ON {on}" 6442 6443 index = self._index 6444 if self._match(TokenType.DEFAULT): 6445 default_value = self._parse_bitwise() 6446 if self._match_text_seq("ON", on): 6447 return default_value 6448 6449 self._retreat(index) 6450 6451 return None 6452 6453 @t.overload 6454 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6455 6456 @t.overload 6457 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6458 6459 def _parse_json_object(self, agg=False): 6460 star = self._parse_star() 6461 expressions = ( 6462 [star] 6463 if star 6464 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6465 ) 6466 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6467 6468 unique_keys = None 6469 if self._match_text_seq("WITH", "UNIQUE"): 6470 unique_keys = True 6471 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6472 unique_keys = False 6473 6474 self._match_text_seq("KEYS") 6475 6476 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6477 self._parse_type() 6478 ) 6479 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6480 6481 return self.expression( 6482 exp.JSONObjectAgg if agg else exp.JSONObject, 6483 expressions=expressions, 6484 null_handling=null_handling, 6485 unique_keys=unique_keys, 6486 return_type=return_type, 6487 encoding=encoding, 6488 ) 6489 6490 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6491 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6492 if not self._match_text_seq("NESTED"): 6493 this = self._parse_id_var() 6494 kind = self._parse_types(allow_identifiers=False) 6495 nested = None 6496 else: 6497 this = None 6498 kind = None 6499 nested = True 6500 6501 path = self._match_text_seq("PATH") and self._parse_string() 6502 nested_schema = nested and self._parse_json_schema() 6503 6504 return self.expression( 6505 exp.JSONColumnDef, 6506 this=this, 6507 kind=kind, 6508 path=path, 6509 nested_schema=nested_schema, 6510 ) 6511 6512 def _parse_json_schema(self) -> exp.JSONSchema: 6513 self._match_text_seq("COLUMNS") 6514 return self.expression( 6515 exp.JSONSchema, 6516 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6517 ) 6518 6519 def _parse_json_table(self) -> exp.JSONTable: 6520 this = self._parse_format_json(self._parse_bitwise()) 6521 path = self._match(TokenType.COMMA) and self._parse_string() 6522 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6523 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6524 schema = self._parse_json_schema() 6525 6526 return exp.JSONTable( 6527 this=this, 6528 schema=schema, 6529 path=path, 6530 error_handling=error_handling, 6531 empty_handling=empty_handling, 6532 ) 6533 6534 def _parse_match_against(self) -> exp.MatchAgainst: 6535 expressions = self._parse_csv(self._parse_column) 6536 6537 self._match_text_seq(")", "AGAINST", "(") 6538 6539 this = self._parse_string() 6540 6541 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6542 modifier = "IN NATURAL LANGUAGE MODE" 6543 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6544 modifier = f"{modifier} WITH QUERY EXPANSION" 6545 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6546 modifier = "IN BOOLEAN MODE" 6547 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6548 modifier = "WITH QUERY EXPANSION" 6549 else: 6550 modifier = None 6551 6552 return self.expression( 6553 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6554 ) 6555 6556 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6557 def _parse_open_json(self) -> exp.OpenJSON: 6558 this = self._parse_bitwise() 6559 path = self._match(TokenType.COMMA) and self._parse_string() 6560 6561 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6562 this = self._parse_field(any_token=True) 6563 kind = self._parse_types() 6564 path = self._parse_string() 6565 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6566 6567 return self.expression( 6568 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6569 ) 6570 6571 expressions = None 6572 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6573 self._match_l_paren() 6574 expressions = self._parse_csv(_parse_open_json_column_def) 6575 6576 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6577 6578 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6579 args = self._parse_csv(self._parse_bitwise) 6580 6581 if self._match(TokenType.IN): 6582 return self.expression( 6583 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6584 ) 6585 6586 if haystack_first: 6587 haystack = seq_get(args, 0) 6588 needle = seq_get(args, 1) 6589 else: 6590 haystack = seq_get(args, 1) 6591 needle = seq_get(args, 0) 6592 6593 return self.expression( 6594 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6595 ) 6596 6597 def _parse_predict(self) -> exp.Predict: 6598 self._match_text_seq("MODEL") 6599 this = self._parse_table() 6600 6601 self._match(TokenType.COMMA) 6602 self._match_text_seq("TABLE") 6603 6604 return self.expression( 6605 exp.Predict, 6606 this=this, 6607 expression=self._parse_table(), 6608 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6609 ) 6610 6611 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6612 args = self._parse_csv(self._parse_table) 6613 return exp.JoinHint(this=func_name.upper(), expressions=args) 6614 6615 def _parse_substring(self) -> exp.Substring: 6616 # Postgres supports the form: substring(string [from int] [for int]) 6617 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6618 6619 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6620 6621 if self._match(TokenType.FROM): 6622 args.append(self._parse_bitwise()) 6623 if self._match(TokenType.FOR): 6624 if len(args) == 1: 6625 args.append(exp.Literal.number(1)) 6626 args.append(self._parse_bitwise()) 6627 6628 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6629 6630 def _parse_trim(self) -> exp.Trim: 6631 # https://www.w3resource.com/sql/character-functions/trim.php 6632 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6633 6634 position = None 6635 collation = None 6636 expression = None 6637 6638 if self._match_texts(self.TRIM_TYPES): 6639 position = self._prev.text.upper() 6640 6641 this = self._parse_bitwise() 6642 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6643 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6644 expression = self._parse_bitwise() 6645 6646 if invert_order: 6647 this, expression = expression, this 6648 6649 if self._match(TokenType.COLLATE): 6650 collation = self._parse_bitwise() 6651 6652 return self.expression( 6653 exp.Trim, this=this, position=position, expression=expression, collation=collation 6654 ) 6655 6656 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6657 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6658 6659 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6660 return self._parse_window(self._parse_id_var(), alias=True) 6661 6662 def _parse_respect_or_ignore_nulls( 6663 self, this: t.Optional[exp.Expression] 6664 ) -> t.Optional[exp.Expression]: 6665 if self._match_text_seq("IGNORE", "NULLS"): 6666 return self.expression(exp.IgnoreNulls, this=this) 6667 if self._match_text_seq("RESPECT", "NULLS"): 6668 return self.expression(exp.RespectNulls, this=this) 6669 return this 6670 6671 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6672 if self._match(TokenType.HAVING): 6673 self._match_texts(("MAX", "MIN")) 6674 max = self._prev.text.upper() != "MIN" 6675 return self.expression( 6676 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6677 ) 6678 6679 return this 6680 6681 def _parse_window( 6682 self, this: t.Optional[exp.Expression], alias: bool = False 6683 ) -> t.Optional[exp.Expression]: 6684 func = this 6685 comments = func.comments if isinstance(func, exp.Expression) else None 6686 6687 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6688 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6689 if self._match_text_seq("WITHIN", "GROUP"): 6690 order = self._parse_wrapped(self._parse_order) 6691 this = self.expression(exp.WithinGroup, this=this, expression=order) 6692 6693 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6694 self._match(TokenType.WHERE) 6695 this = self.expression( 6696 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6697 ) 6698 self._match_r_paren() 6699 6700 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6701 # Some dialects choose to implement and some do not. 6702 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6703 6704 # There is some code above in _parse_lambda that handles 6705 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6706 6707 # The below changes handle 6708 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6709 6710 # Oracle allows both formats 6711 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6712 # and Snowflake chose to do the same for familiarity 6713 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6714 if isinstance(this, exp.AggFunc): 6715 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6716 6717 if ignore_respect and ignore_respect is not this: 6718 ignore_respect.replace(ignore_respect.this) 6719 this = self.expression(ignore_respect.__class__, this=this) 6720 6721 this = self._parse_respect_or_ignore_nulls(this) 6722 6723 # bigquery select from window x AS (partition by ...) 6724 if alias: 6725 over = None 6726 self._match(TokenType.ALIAS) 6727 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6728 return this 6729 else: 6730 over = self._prev.text.upper() 6731 6732 if comments and isinstance(func, exp.Expression): 6733 func.pop_comments() 6734 6735 if not self._match(TokenType.L_PAREN): 6736 return self.expression( 6737 exp.Window, 6738 comments=comments, 6739 this=this, 6740 alias=self._parse_id_var(False), 6741 over=over, 6742 ) 6743 6744 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6745 6746 first = self._match(TokenType.FIRST) 6747 if self._match_text_seq("LAST"): 6748 first = False 6749 6750 partition, order = self._parse_partition_and_order() 6751 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6752 6753 if kind: 6754 self._match(TokenType.BETWEEN) 6755 start = self._parse_window_spec() 6756 self._match(TokenType.AND) 6757 end = self._parse_window_spec() 6758 6759 spec = self.expression( 6760 exp.WindowSpec, 6761 kind=kind, 6762 start=start["value"], 6763 start_side=start["side"], 6764 end=end["value"], 6765 end_side=end["side"], 6766 ) 6767 else: 6768 spec = None 6769 6770 self._match_r_paren() 6771 6772 window = self.expression( 6773 exp.Window, 6774 comments=comments, 6775 this=this, 6776 partition_by=partition, 6777 order=order, 6778 spec=spec, 6779 alias=window_alias, 6780 over=over, 6781 first=first, 6782 ) 6783 6784 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6785 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6786 return self._parse_window(window, alias=alias) 6787 6788 return window 6789 6790 def _parse_partition_and_order( 6791 self, 6792 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6793 return self._parse_partition_by(), self._parse_order() 6794 6795 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6796 self._match(TokenType.BETWEEN) 6797 6798 return { 6799 "value": ( 6800 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6801 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6802 or self._parse_bitwise() 6803 ), 6804 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6805 } 6806 6807 def _parse_alias( 6808 self, this: t.Optional[exp.Expression], explicit: bool = False 6809 ) -> t.Optional[exp.Expression]: 6810 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6811 # so this section tries to parse the clause version and if it fails, it treats the token 6812 # as an identifier (alias) 6813 if self._can_parse_limit_or_offset(): 6814 return this 6815 6816 any_token = self._match(TokenType.ALIAS) 6817 comments = self._prev_comments or [] 6818 6819 if explicit and not any_token: 6820 return this 6821 6822 if self._match(TokenType.L_PAREN): 6823 aliases = self.expression( 6824 exp.Aliases, 6825 comments=comments, 6826 this=this, 6827 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6828 ) 6829 self._match_r_paren(aliases) 6830 return aliases 6831 6832 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6833 self.STRING_ALIASES and self._parse_string_as_identifier() 6834 ) 6835 6836 if alias: 6837 comments.extend(alias.pop_comments()) 6838 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6839 column = this.this 6840 6841 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6842 if not this.comments and column and column.comments: 6843 this.comments = column.pop_comments() 6844 6845 return this 6846 6847 def _parse_id_var( 6848 self, 6849 any_token: bool = True, 6850 tokens: t.Optional[t.Collection[TokenType]] = None, 6851 ) -> t.Optional[exp.Expression]: 6852 expression = self._parse_identifier() 6853 if not expression and ( 6854 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6855 ): 6856 quoted = self._prev.token_type == TokenType.STRING 6857 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6858 6859 return expression 6860 6861 def _parse_string(self) -> t.Optional[exp.Expression]: 6862 if self._match_set(self.STRING_PARSERS): 6863 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6864 return self._parse_placeholder() 6865 6866 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6867 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6868 6869 def _parse_number(self) -> t.Optional[exp.Expression]: 6870 if self._match_set(self.NUMERIC_PARSERS): 6871 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6872 return self._parse_placeholder() 6873 6874 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6875 if self._match(TokenType.IDENTIFIER): 6876 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6877 return self._parse_placeholder() 6878 6879 def _parse_var( 6880 self, 6881 any_token: bool = False, 6882 tokens: t.Optional[t.Collection[TokenType]] = None, 6883 upper: bool = False, 6884 ) -> t.Optional[exp.Expression]: 6885 if ( 6886 (any_token and self._advance_any()) 6887 or self._match(TokenType.VAR) 6888 or (self._match_set(tokens) if tokens else False) 6889 ): 6890 return self.expression( 6891 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6892 ) 6893 return self._parse_placeholder() 6894 6895 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6896 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6897 self._advance() 6898 return self._prev 6899 return None 6900 6901 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6902 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6903 6904 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6905 return self._parse_primary() or self._parse_var(any_token=True) 6906 6907 def _parse_null(self) -> t.Optional[exp.Expression]: 6908 if self._match_set(self.NULL_TOKENS): 6909 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6910 return self._parse_placeholder() 6911 6912 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6913 if self._match(TokenType.TRUE): 6914 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6915 if self._match(TokenType.FALSE): 6916 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6917 return self._parse_placeholder() 6918 6919 def _parse_star(self) -> t.Optional[exp.Expression]: 6920 if self._match(TokenType.STAR): 6921 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6922 return self._parse_placeholder() 6923 6924 def _parse_parameter(self) -> exp.Parameter: 6925 this = self._parse_identifier() or self._parse_primary_or_var() 6926 return self.expression(exp.Parameter, this=this) 6927 6928 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6929 if self._match_set(self.PLACEHOLDER_PARSERS): 6930 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6931 if placeholder: 6932 return placeholder 6933 self._advance(-1) 6934 return None 6935 6936 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6937 if not self._match_texts(keywords): 6938 return None 6939 if self._match(TokenType.L_PAREN, advance=False): 6940 return self._parse_wrapped_csv(self._parse_expression) 6941 6942 expression = self._parse_expression() 6943 return [expression] if expression else None 6944 6945 def _parse_csv( 6946 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6947 ) -> t.List[exp.Expression]: 6948 parse_result = parse_method() 6949 items = [parse_result] if parse_result is not None else [] 6950 6951 while self._match(sep): 6952 self._add_comments(parse_result) 6953 parse_result = parse_method() 6954 if parse_result is not None: 6955 items.append(parse_result) 6956 6957 return items 6958 6959 def _parse_tokens( 6960 self, parse_method: t.Callable, expressions: t.Dict 6961 ) -> t.Optional[exp.Expression]: 6962 this = parse_method() 6963 6964 while self._match_set(expressions): 6965 this = self.expression( 6966 expressions[self._prev.token_type], 6967 this=this, 6968 comments=self._prev_comments, 6969 expression=parse_method(), 6970 ) 6971 6972 return this 6973 6974 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6975 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6976 6977 def _parse_wrapped_csv( 6978 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6979 ) -> t.List[exp.Expression]: 6980 return self._parse_wrapped( 6981 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6982 ) 6983 6984 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6985 wrapped = self._match(TokenType.L_PAREN) 6986 if not wrapped and not optional: 6987 self.raise_error("Expecting (") 6988 parse_result = parse_method() 6989 if wrapped: 6990 self._match_r_paren() 6991 return parse_result 6992 6993 def _parse_expressions(self) -> t.List[exp.Expression]: 6994 return self._parse_csv(self._parse_expression) 6995 6996 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6997 return self._parse_select() or self._parse_set_operations( 6998 self._parse_alias(self._parse_assignment(), explicit=True) 6999 if alias 7000 else self._parse_assignment() 7001 ) 7002 7003 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7004 return self._parse_query_modifiers( 7005 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7006 ) 7007 7008 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7009 this = None 7010 if self._match_texts(self.TRANSACTION_KIND): 7011 this = self._prev.text 7012 7013 self._match_texts(("TRANSACTION", "WORK")) 7014 7015 modes = [] 7016 while True: 7017 mode = [] 7018 while self._match(TokenType.VAR): 7019 mode.append(self._prev.text) 7020 7021 if mode: 7022 modes.append(" ".join(mode)) 7023 if not self._match(TokenType.COMMA): 7024 break 7025 7026 return self.expression(exp.Transaction, this=this, modes=modes) 7027 7028 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7029 chain = None 7030 savepoint = None 7031 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7032 7033 self._match_texts(("TRANSACTION", "WORK")) 7034 7035 if self._match_text_seq("TO"): 7036 self._match_text_seq("SAVEPOINT") 7037 savepoint = self._parse_id_var() 7038 7039 if self._match(TokenType.AND): 7040 chain = not self._match_text_seq("NO") 7041 self._match_text_seq("CHAIN") 7042 7043 if is_rollback: 7044 return self.expression(exp.Rollback, savepoint=savepoint) 7045 7046 return self.expression(exp.Commit, chain=chain) 7047 7048 def _parse_refresh(self) -> exp.Refresh: 7049 self._match(TokenType.TABLE) 7050 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7051 7052 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7053 if not self._match_text_seq("ADD"): 7054 return None 7055 7056 self._match(TokenType.COLUMN) 7057 exists_column = self._parse_exists(not_=True) 7058 expression = self._parse_field_def() 7059 7060 if expression: 7061 expression.set("exists", exists_column) 7062 7063 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7064 if self._match_texts(("FIRST", "AFTER")): 7065 position = self._prev.text 7066 column_position = self.expression( 7067 exp.ColumnPosition, this=self._parse_column(), position=position 7068 ) 7069 expression.set("position", column_position) 7070 7071 return expression 7072 7073 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7074 drop = self._match(TokenType.DROP) and self._parse_drop() 7075 if drop and not isinstance(drop, exp.Command): 7076 drop.set("kind", drop.args.get("kind", "COLUMN")) 7077 return drop 7078 7079 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7080 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7081 return self.expression( 7082 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7083 ) 7084 7085 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7086 index = self._index - 1 7087 7088 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7089 return self._parse_csv( 7090 lambda: self.expression( 7091 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7092 ) 7093 ) 7094 7095 self._retreat(index) 7096 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7097 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7098 7099 if self._match_text_seq("ADD", "COLUMNS"): 7100 schema = self._parse_schema() 7101 if schema: 7102 return [schema] 7103 return [] 7104 7105 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7106 7107 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7108 if self._match_texts(self.ALTER_ALTER_PARSERS): 7109 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7110 7111 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7112 # keyword after ALTER we default to parsing this statement 7113 self._match(TokenType.COLUMN) 7114 column = self._parse_field(any_token=True) 7115 7116 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7117 return self.expression(exp.AlterColumn, this=column, drop=True) 7118 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7119 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7120 if self._match(TokenType.COMMENT): 7121 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7122 if self._match_text_seq("DROP", "NOT", "NULL"): 7123 return self.expression( 7124 exp.AlterColumn, 7125 this=column, 7126 drop=True, 7127 allow_null=True, 7128 ) 7129 if self._match_text_seq("SET", "NOT", "NULL"): 7130 return self.expression( 7131 exp.AlterColumn, 7132 this=column, 7133 allow_null=False, 7134 ) 7135 7136 if self._match_text_seq("SET", "VISIBLE"): 7137 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7138 if self._match_text_seq("SET", "INVISIBLE"): 7139 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7140 7141 self._match_text_seq("SET", "DATA") 7142 self._match_text_seq("TYPE") 7143 return self.expression( 7144 exp.AlterColumn, 7145 this=column, 7146 dtype=self._parse_types(), 7147 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7148 using=self._match(TokenType.USING) and self._parse_assignment(), 7149 ) 7150 7151 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7152 if self._match_texts(("ALL", "EVEN", "AUTO")): 7153 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7154 7155 self._match_text_seq("KEY", "DISTKEY") 7156 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7157 7158 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7159 if compound: 7160 self._match_text_seq("SORTKEY") 7161 7162 if self._match(TokenType.L_PAREN, advance=False): 7163 return self.expression( 7164 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7165 ) 7166 7167 self._match_texts(("AUTO", "NONE")) 7168 return self.expression( 7169 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7170 ) 7171 7172 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7173 index = self._index - 1 7174 7175 partition_exists = self._parse_exists() 7176 if self._match(TokenType.PARTITION, advance=False): 7177 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7178 7179 self._retreat(index) 7180 return self._parse_csv(self._parse_drop_column) 7181 7182 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7183 if self._match(TokenType.COLUMN): 7184 exists = self._parse_exists() 7185 old_column = self._parse_column() 7186 to = self._match_text_seq("TO") 7187 new_column = self._parse_column() 7188 7189 if old_column is None or to is None or new_column is None: 7190 return None 7191 7192 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7193 7194 self._match_text_seq("TO") 7195 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7196 7197 def _parse_alter_table_set(self) -> exp.AlterSet: 7198 alter_set = self.expression(exp.AlterSet) 7199 7200 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7201 "TABLE", "PROPERTIES" 7202 ): 7203 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7204 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7205 alter_set.set("expressions", [self._parse_assignment()]) 7206 elif self._match_texts(("LOGGED", "UNLOGGED")): 7207 alter_set.set("option", exp.var(self._prev.text.upper())) 7208 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7209 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7210 elif self._match_text_seq("LOCATION"): 7211 alter_set.set("location", self._parse_field()) 7212 elif self._match_text_seq("ACCESS", "METHOD"): 7213 alter_set.set("access_method", self._parse_field()) 7214 elif self._match_text_seq("TABLESPACE"): 7215 alter_set.set("tablespace", self._parse_field()) 7216 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7217 alter_set.set("file_format", [self._parse_field()]) 7218 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7219 alter_set.set("file_format", self._parse_wrapped_options()) 7220 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7221 alter_set.set("copy_options", self._parse_wrapped_options()) 7222 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7223 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7224 else: 7225 if self._match_text_seq("SERDE"): 7226 alter_set.set("serde", self._parse_field()) 7227 7228 alter_set.set("expressions", [self._parse_properties()]) 7229 7230 return alter_set 7231 7232 def _parse_alter(self) -> exp.Alter | exp.Command: 7233 start = self._prev 7234 7235 alter_token = self._match_set(self.ALTERABLES) and self._prev 7236 if not alter_token: 7237 return self._parse_as_command(start) 7238 7239 exists = self._parse_exists() 7240 only = self._match_text_seq("ONLY") 7241 this = self._parse_table(schema=True) 7242 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7243 7244 if self._next: 7245 self._advance() 7246 7247 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7248 if parser: 7249 actions = ensure_list(parser(self)) 7250 not_valid = self._match_text_seq("NOT", "VALID") 7251 options = self._parse_csv(self._parse_property) 7252 7253 if not self._curr and actions: 7254 return self.expression( 7255 exp.Alter, 7256 this=this, 7257 kind=alter_token.text.upper(), 7258 exists=exists, 7259 actions=actions, 7260 only=only, 7261 options=options, 7262 cluster=cluster, 7263 not_valid=not_valid, 7264 ) 7265 7266 return self._parse_as_command(start) 7267 7268 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7269 start = self._prev 7270 # https://duckdb.org/docs/sql/statements/analyze 7271 if not self._curr: 7272 return self.expression(exp.Analyze) 7273 7274 options = [] 7275 while self._match_texts(self.ANALYZE_STYLES): 7276 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7277 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7278 else: 7279 options.append(self._prev.text.upper()) 7280 7281 this: t.Optional[exp.Expression] = None 7282 inner_expression: t.Optional[exp.Expression] = None 7283 7284 kind = self._curr and self._curr.text.upper() 7285 7286 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7287 this = self._parse_table_parts() 7288 elif self._match_text_seq("TABLES"): 7289 if self._match_set((TokenType.FROM, TokenType.IN)): 7290 kind = f"{kind} {self._prev.text.upper()}" 7291 this = self._parse_table(schema=True, is_db_reference=True) 7292 elif self._match_text_seq("DATABASE"): 7293 this = self._parse_table(schema=True, is_db_reference=True) 7294 elif self._match_text_seq("CLUSTER"): 7295 this = self._parse_table() 7296 # Try matching inner expr keywords before fallback to parse table. 7297 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7298 kind = None 7299 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7300 else: 7301 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7302 kind = None 7303 this = self._parse_table_parts() 7304 7305 partition = self._try_parse(self._parse_partition) 7306 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7307 return self._parse_as_command(start) 7308 7309 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7310 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7311 "WITH", "ASYNC", "MODE" 7312 ): 7313 mode = f"WITH {self._tokens[self._index-2].text.upper()} MODE" 7314 else: 7315 mode = None 7316 7317 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7318 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7319 7320 properties = self._parse_properties() 7321 return self.expression( 7322 exp.Analyze, 7323 kind=kind, 7324 this=this, 7325 mode=mode, 7326 partition=partition, 7327 properties=properties, 7328 expression=inner_expression, 7329 options=options, 7330 ) 7331 7332 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7333 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7334 this = None 7335 kind = self._prev.text.upper() 7336 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7337 expressions = [] 7338 7339 if not self._match_text_seq("STATISTICS"): 7340 self.raise_error("Expecting token STATISTICS") 7341 7342 if self._match_text_seq("NOSCAN"): 7343 this = "NOSCAN" 7344 elif self._match(TokenType.FOR): 7345 if self._match_text_seq("ALL", "COLUMNS"): 7346 this = "FOR ALL COLUMNS" 7347 if self._match_texts("COLUMNS"): 7348 this = "FOR COLUMNS" 7349 expressions = self._parse_csv(self._parse_column_reference) 7350 elif self._match_text_seq("SAMPLE"): 7351 sample = self._parse_number() 7352 expressions = [ 7353 self.expression( 7354 exp.AnalyzeSample, 7355 sample=sample, 7356 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7357 ) 7358 ] 7359 7360 return self.expression( 7361 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7362 ) 7363 7364 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7365 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7366 kind = None 7367 this = None 7368 expression: t.Optional[exp.Expression] = None 7369 if self._match_text_seq("REF", "UPDATE"): 7370 kind = "REF" 7371 this = "UPDATE" 7372 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7373 this = "UPDATE SET DANGLING TO NULL" 7374 elif self._match_text_seq("STRUCTURE"): 7375 kind = "STRUCTURE" 7376 if self._match_text_seq("CASCADE", "FAST"): 7377 this = "CASCADE FAST" 7378 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7379 ("ONLINE", "OFFLINE") 7380 ): 7381 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7382 expression = self._parse_into() 7383 7384 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7385 7386 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7387 this = self._prev.text.upper() 7388 if self._match_text_seq("COLUMNS"): 7389 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7390 return None 7391 7392 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7393 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7394 if self._match_text_seq("STATISTICS"): 7395 return self.expression(exp.AnalyzeDelete, kind=kind) 7396 return None 7397 7398 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7399 if self._match_text_seq("CHAINED", "ROWS"): 7400 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7401 return None 7402 7403 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7404 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7405 this = self._prev.text.upper() 7406 expression: t.Optional[exp.Expression] = None 7407 expressions = [] 7408 update_options = None 7409 7410 if self._match_text_seq("HISTOGRAM", "ON"): 7411 expressions = self._parse_csv(self._parse_column_reference) 7412 with_expressions = [] 7413 while self._match(TokenType.WITH): 7414 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7415 if self._match_texts(("SYNC", "ASYNC")): 7416 if self._match_text_seq("MODE", advance=False): 7417 with_expressions.append(f"{self._prev.text.upper()} MODE") 7418 self._advance() 7419 else: 7420 buckets = self._parse_number() 7421 if self._match_text_seq("BUCKETS"): 7422 with_expressions.append(f"{buckets} BUCKETS") 7423 if with_expressions: 7424 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7425 7426 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7427 TokenType.UPDATE, advance=False 7428 ): 7429 update_options = self._prev.text.upper() 7430 self._advance() 7431 elif self._match_text_seq("USING", "DATA"): 7432 expression = self.expression(exp.UsingData, this=self._parse_string()) 7433 7434 return self.expression( 7435 exp.AnalyzeHistogram, 7436 this=this, 7437 expressions=expressions, 7438 expression=expression, 7439 update_options=update_options, 7440 ) 7441 7442 def _parse_merge(self) -> exp.Merge: 7443 self._match(TokenType.INTO) 7444 target = self._parse_table() 7445 7446 if target and self._match(TokenType.ALIAS, advance=False): 7447 target.set("alias", self._parse_table_alias()) 7448 7449 self._match(TokenType.USING) 7450 using = self._parse_table() 7451 7452 self._match(TokenType.ON) 7453 on = self._parse_assignment() 7454 7455 return self.expression( 7456 exp.Merge, 7457 this=target, 7458 using=using, 7459 on=on, 7460 whens=self._parse_when_matched(), 7461 returning=self._parse_returning(), 7462 ) 7463 7464 def _parse_when_matched(self) -> exp.Whens: 7465 whens = [] 7466 7467 while self._match(TokenType.WHEN): 7468 matched = not self._match(TokenType.NOT) 7469 self._match_text_seq("MATCHED") 7470 source = ( 7471 False 7472 if self._match_text_seq("BY", "TARGET") 7473 else self._match_text_seq("BY", "SOURCE") 7474 ) 7475 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7476 7477 self._match(TokenType.THEN) 7478 7479 if self._match(TokenType.INSERT): 7480 this = self._parse_star() 7481 if this: 7482 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7483 else: 7484 then = self.expression( 7485 exp.Insert, 7486 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7487 expression=self._match_text_seq("VALUES") and self._parse_value(), 7488 ) 7489 elif self._match(TokenType.UPDATE): 7490 expressions = self._parse_star() 7491 if expressions: 7492 then = self.expression(exp.Update, expressions=expressions) 7493 else: 7494 then = self.expression( 7495 exp.Update, 7496 expressions=self._match(TokenType.SET) 7497 and self._parse_csv(self._parse_equality), 7498 ) 7499 elif self._match(TokenType.DELETE): 7500 then = self.expression(exp.Var, this=self._prev.text) 7501 else: 7502 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7503 7504 whens.append( 7505 self.expression( 7506 exp.When, 7507 matched=matched, 7508 source=source, 7509 condition=condition, 7510 then=then, 7511 ) 7512 ) 7513 return self.expression(exp.Whens, expressions=whens) 7514 7515 def _parse_show(self) -> t.Optional[exp.Expression]: 7516 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7517 if parser: 7518 return parser(self) 7519 return self._parse_as_command(self._prev) 7520 7521 def _parse_set_item_assignment( 7522 self, kind: t.Optional[str] = None 7523 ) -> t.Optional[exp.Expression]: 7524 index = self._index 7525 7526 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7527 return self._parse_set_transaction(global_=kind == "GLOBAL") 7528 7529 left = self._parse_primary() or self._parse_column() 7530 assignment_delimiter = self._match_texts(("=", "TO")) 7531 7532 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7533 self._retreat(index) 7534 return None 7535 7536 right = self._parse_statement() or self._parse_id_var() 7537 if isinstance(right, (exp.Column, exp.Identifier)): 7538 right = exp.var(right.name) 7539 7540 this = self.expression(exp.EQ, this=left, expression=right) 7541 return self.expression(exp.SetItem, this=this, kind=kind) 7542 7543 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7544 self._match_text_seq("TRANSACTION") 7545 characteristics = self._parse_csv( 7546 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7547 ) 7548 return self.expression( 7549 exp.SetItem, 7550 expressions=characteristics, 7551 kind="TRANSACTION", 7552 **{"global": global_}, # type: ignore 7553 ) 7554 7555 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7556 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7557 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7558 7559 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7560 index = self._index 7561 set_ = self.expression( 7562 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7563 ) 7564 7565 if self._curr: 7566 self._retreat(index) 7567 return self._parse_as_command(self._prev) 7568 7569 return set_ 7570 7571 def _parse_var_from_options( 7572 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7573 ) -> t.Optional[exp.Var]: 7574 start = self._curr 7575 if not start: 7576 return None 7577 7578 option = start.text.upper() 7579 continuations = options.get(option) 7580 7581 index = self._index 7582 self._advance() 7583 for keywords in continuations or []: 7584 if isinstance(keywords, str): 7585 keywords = (keywords,) 7586 7587 if self._match_text_seq(*keywords): 7588 option = f"{option} {' '.join(keywords)}" 7589 break 7590 else: 7591 if continuations or continuations is None: 7592 if raise_unmatched: 7593 self.raise_error(f"Unknown option {option}") 7594 7595 self._retreat(index) 7596 return None 7597 7598 return exp.var(option) 7599 7600 def _parse_as_command(self, start: Token) -> exp.Command: 7601 while self._curr: 7602 self._advance() 7603 text = self._find_sql(start, self._prev) 7604 size = len(start.text) 7605 self._warn_unsupported() 7606 return exp.Command(this=text[:size], expression=text[size:]) 7607 7608 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7609 settings = [] 7610 7611 self._match_l_paren() 7612 kind = self._parse_id_var() 7613 7614 if self._match(TokenType.L_PAREN): 7615 while True: 7616 key = self._parse_id_var() 7617 value = self._parse_primary() 7618 if not key and value is None: 7619 break 7620 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7621 self._match(TokenType.R_PAREN) 7622 7623 self._match_r_paren() 7624 7625 return self.expression( 7626 exp.DictProperty, 7627 this=this, 7628 kind=kind.this if kind else None, 7629 settings=settings, 7630 ) 7631 7632 def _parse_dict_range(self, this: str) -> exp.DictRange: 7633 self._match_l_paren() 7634 has_min = self._match_text_seq("MIN") 7635 if has_min: 7636 min = self._parse_var() or self._parse_primary() 7637 self._match_text_seq("MAX") 7638 max = self._parse_var() or self._parse_primary() 7639 else: 7640 max = self._parse_var() or self._parse_primary() 7641 min = exp.Literal.number(0) 7642 self._match_r_paren() 7643 return self.expression(exp.DictRange, this=this, min=min, max=max) 7644 7645 def _parse_comprehension( 7646 self, this: t.Optional[exp.Expression] 7647 ) -> t.Optional[exp.Comprehension]: 7648 index = self._index 7649 expression = self._parse_column() 7650 if not self._match(TokenType.IN): 7651 self._retreat(index - 1) 7652 return None 7653 iterator = self._parse_column() 7654 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7655 return self.expression( 7656 exp.Comprehension, 7657 this=this, 7658 expression=expression, 7659 iterator=iterator, 7660 condition=condition, 7661 ) 7662 7663 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7664 if self._match(TokenType.HEREDOC_STRING): 7665 return self.expression(exp.Heredoc, this=self._prev.text) 7666 7667 if not self._match_text_seq("$"): 7668 return None 7669 7670 tags = ["$"] 7671 tag_text = None 7672 7673 if self._is_connected(): 7674 self._advance() 7675 tags.append(self._prev.text.upper()) 7676 else: 7677 self.raise_error("No closing $ found") 7678 7679 if tags[-1] != "$": 7680 if self._is_connected() and self._match_text_seq("$"): 7681 tag_text = tags[-1] 7682 tags.append("$") 7683 else: 7684 self.raise_error("No closing $ found") 7685 7686 heredoc_start = self._curr 7687 7688 while self._curr: 7689 if self._match_text_seq(*tags, advance=False): 7690 this = self._find_sql(heredoc_start, self._prev) 7691 self._advance(len(tags)) 7692 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7693 7694 self._advance() 7695 7696 self.raise_error(f"No closing {''.join(tags)} found") 7697 return None 7698 7699 def _find_parser( 7700 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7701 ) -> t.Optional[t.Callable]: 7702 if not self._curr: 7703 return None 7704 7705 index = self._index 7706 this = [] 7707 while True: 7708 # The current token might be multiple words 7709 curr = self._curr.text.upper() 7710 key = curr.split(" ") 7711 this.append(curr) 7712 7713 self._advance() 7714 result, trie = in_trie(trie, key) 7715 if result == TrieResult.FAILED: 7716 break 7717 7718 if result == TrieResult.EXISTS: 7719 subparser = parsers[" ".join(this)] 7720 return subparser 7721 7722 self._retreat(index) 7723 return None 7724 7725 def _match(self, token_type, advance=True, expression=None): 7726 if not self._curr: 7727 return None 7728 7729 if self._curr.token_type == token_type: 7730 if advance: 7731 self._advance() 7732 self._add_comments(expression) 7733 return True 7734 7735 return None 7736 7737 def _match_set(self, types, advance=True): 7738 if not self._curr: 7739 return None 7740 7741 if self._curr.token_type in types: 7742 if advance: 7743 self._advance() 7744 return True 7745 7746 return None 7747 7748 def _match_pair(self, token_type_a, token_type_b, advance=True): 7749 if not self._curr or not self._next: 7750 return None 7751 7752 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7753 if advance: 7754 self._advance(2) 7755 return True 7756 7757 return None 7758 7759 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7760 if not self._match(TokenType.L_PAREN, expression=expression): 7761 self.raise_error("Expecting (") 7762 7763 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7764 if not self._match(TokenType.R_PAREN, expression=expression): 7765 self.raise_error("Expecting )") 7766 7767 def _match_texts(self, texts, advance=True): 7768 if ( 7769 self._curr 7770 and self._curr.token_type != TokenType.STRING 7771 and self._curr.text.upper() in texts 7772 ): 7773 if advance: 7774 self._advance() 7775 return True 7776 return None 7777 7778 def _match_text_seq(self, *texts, advance=True): 7779 index = self._index 7780 for text in texts: 7781 if ( 7782 self._curr 7783 and self._curr.token_type != TokenType.STRING 7784 and self._curr.text.upper() == text 7785 ): 7786 self._advance() 7787 else: 7788 self._retreat(index) 7789 return None 7790 7791 if not advance: 7792 self._retreat(index) 7793 7794 return True 7795 7796 def _replace_lambda( 7797 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7798 ) -> t.Optional[exp.Expression]: 7799 if not node: 7800 return node 7801 7802 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7803 7804 for column in node.find_all(exp.Column): 7805 typ = lambda_types.get(column.parts[0].name) 7806 if typ is not None: 7807 dot_or_id = column.to_dot() if column.table else column.this 7808 7809 if typ: 7810 dot_or_id = self.expression( 7811 exp.Cast, 7812 this=dot_or_id, 7813 to=typ, 7814 ) 7815 7816 parent = column.parent 7817 7818 while isinstance(parent, exp.Dot): 7819 if not isinstance(parent.parent, exp.Dot): 7820 parent.replace(dot_or_id) 7821 break 7822 parent = parent.parent 7823 else: 7824 if column is node: 7825 node = dot_or_id 7826 else: 7827 column.replace(dot_or_id) 7828 return node 7829 7830 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7831 start = self._prev 7832 7833 # Not to be confused with TRUNCATE(number, decimals) function call 7834 if self._match(TokenType.L_PAREN): 7835 self._retreat(self._index - 2) 7836 return self._parse_function() 7837 7838 # Clickhouse supports TRUNCATE DATABASE as well 7839 is_database = self._match(TokenType.DATABASE) 7840 7841 self._match(TokenType.TABLE) 7842 7843 exists = self._parse_exists(not_=False) 7844 7845 expressions = self._parse_csv( 7846 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7847 ) 7848 7849 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7850 7851 if self._match_text_seq("RESTART", "IDENTITY"): 7852 identity = "RESTART" 7853 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7854 identity = "CONTINUE" 7855 else: 7856 identity = None 7857 7858 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7859 option = self._prev.text 7860 else: 7861 option = None 7862 7863 partition = self._parse_partition() 7864 7865 # Fallback case 7866 if self._curr: 7867 return self._parse_as_command(start) 7868 7869 return self.expression( 7870 exp.TruncateTable, 7871 expressions=expressions, 7872 is_database=is_database, 7873 exists=exists, 7874 cluster=cluster, 7875 identity=identity, 7876 option=option, 7877 partition=partition, 7878 ) 7879 7880 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7881 this = self._parse_ordered(self._parse_opclass) 7882 7883 if not self._match(TokenType.WITH): 7884 return this 7885 7886 op = self._parse_var(any_token=True) 7887 7888 return self.expression(exp.WithOperator, this=this, op=op) 7889 7890 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7891 self._match(TokenType.EQ) 7892 self._match(TokenType.L_PAREN) 7893 7894 opts: t.List[t.Optional[exp.Expression]] = [] 7895 while self._curr and not self._match(TokenType.R_PAREN): 7896 if self._match_text_seq("FORMAT_NAME", "="): 7897 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7898 # so we parse it separately to use _parse_field() 7899 prop = self.expression( 7900 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7901 ) 7902 opts.append(prop) 7903 else: 7904 opts.append(self._parse_property()) 7905 7906 self._match(TokenType.COMMA) 7907 7908 return opts 7909 7910 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7911 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7912 7913 options = [] 7914 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7915 option = self._parse_var(any_token=True) 7916 prev = self._prev.text.upper() 7917 7918 # Different dialects might separate options and values by white space, "=" and "AS" 7919 self._match(TokenType.EQ) 7920 self._match(TokenType.ALIAS) 7921 7922 param = self.expression(exp.CopyParameter, this=option) 7923 7924 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7925 TokenType.L_PAREN, advance=False 7926 ): 7927 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7928 param.set("expressions", self._parse_wrapped_options()) 7929 elif prev == "FILE_FORMAT": 7930 # T-SQL's external file format case 7931 param.set("expression", self._parse_field()) 7932 else: 7933 param.set("expression", self._parse_unquoted_field()) 7934 7935 options.append(param) 7936 self._match(sep) 7937 7938 return options 7939 7940 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7941 expr = self.expression(exp.Credentials) 7942 7943 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7944 expr.set("storage", self._parse_field()) 7945 if self._match_text_seq("CREDENTIALS"): 7946 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7947 creds = ( 7948 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7949 ) 7950 expr.set("credentials", creds) 7951 if self._match_text_seq("ENCRYPTION"): 7952 expr.set("encryption", self._parse_wrapped_options()) 7953 if self._match_text_seq("IAM_ROLE"): 7954 expr.set("iam_role", self._parse_field()) 7955 if self._match_text_seq("REGION"): 7956 expr.set("region", self._parse_field()) 7957 7958 return expr 7959 7960 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7961 return self._parse_field() 7962 7963 def _parse_copy(self) -> exp.Copy | exp.Command: 7964 start = self._prev 7965 7966 self._match(TokenType.INTO) 7967 7968 this = ( 7969 self._parse_select(nested=True, parse_subquery_alias=False) 7970 if self._match(TokenType.L_PAREN, advance=False) 7971 else self._parse_table(schema=True) 7972 ) 7973 7974 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7975 7976 files = self._parse_csv(self._parse_file_location) 7977 credentials = self._parse_credentials() 7978 7979 self._match_text_seq("WITH") 7980 7981 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7982 7983 # Fallback case 7984 if self._curr: 7985 return self._parse_as_command(start) 7986 7987 return self.expression( 7988 exp.Copy, 7989 this=this, 7990 kind=kind, 7991 credentials=credentials, 7992 files=files, 7993 params=params, 7994 ) 7995 7996 def _parse_normalize(self) -> exp.Normalize: 7997 return self.expression( 7998 exp.Normalize, 7999 this=self._parse_bitwise(), 8000 form=self._match(TokenType.COMMA) and self._parse_var(), 8001 ) 8002 8003 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8004 args = self._parse_csv(lambda: self._parse_lambda()) 8005 8006 this = seq_get(args, 0) 8007 decimals = seq_get(args, 1) 8008 8009 return expr_type( 8010 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8011 ) 8012 8013 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8014 if self._match_text_seq("COLUMNS", "(", advance=False): 8015 this = self._parse_function() 8016 if isinstance(this, exp.Columns): 8017 this.set("unpack", True) 8018 return this 8019 8020 return self.expression( 8021 exp.Star, 8022 **{ # type: ignore 8023 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8024 "replace": self._parse_star_op("REPLACE"), 8025 "rename": self._parse_star_op("RENAME"), 8026 }, 8027 ) 8028 8029 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8030 privilege_parts = [] 8031 8032 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8033 # (end of privilege list) or L_PAREN (start of column list) are met 8034 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8035 privilege_parts.append(self._curr.text.upper()) 8036 self._advance() 8037 8038 this = exp.var(" ".join(privilege_parts)) 8039 expressions = ( 8040 self._parse_wrapped_csv(self._parse_column) 8041 if self._match(TokenType.L_PAREN, advance=False) 8042 else None 8043 ) 8044 8045 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8046 8047 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8048 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8049 principal = self._parse_id_var() 8050 8051 if not principal: 8052 return None 8053 8054 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8055 8056 def _parse_grant(self) -> exp.Grant | exp.Command: 8057 start = self._prev 8058 8059 privileges = self._parse_csv(self._parse_grant_privilege) 8060 8061 self._match(TokenType.ON) 8062 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8063 8064 # Attempt to parse the securable e.g. MySQL allows names 8065 # such as "foo.*", "*.*" which are not easily parseable yet 8066 securable = self._try_parse(self._parse_table_parts) 8067 8068 if not securable or not self._match_text_seq("TO"): 8069 return self._parse_as_command(start) 8070 8071 principals = self._parse_csv(self._parse_grant_principal) 8072 8073 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8074 8075 if self._curr: 8076 return self._parse_as_command(start) 8077 8078 return self.expression( 8079 exp.Grant, 8080 privileges=privileges, 8081 kind=kind, 8082 securable=securable, 8083 principals=principals, 8084 grant_option=grant_option, 8085 ) 8086 8087 def _parse_overlay(self) -> exp.Overlay: 8088 return self.expression( 8089 exp.Overlay, 8090 **{ # type: ignore 8091 "this": self._parse_bitwise(), 8092 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8093 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8094 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8095 }, 8096 )
27def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 28 if len(args) == 1 and args[0].is_star: 29 return exp.StarMap(this=args[0]) 30 31 keys = [] 32 values = [] 33 for i in range(0, len(args), 2): 34 keys.append(args[i]) 35 values.append(args[i + 1]) 36 37 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
45def binary_range_parser( 46 expr_type: t.Type[exp.Expression], reverse_args: bool = False 47) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 48 def _parse_binary_range( 49 self: Parser, this: t.Optional[exp.Expression] 50 ) -> t.Optional[exp.Expression]: 51 expression = self._parse_bitwise() 52 if reverse_args: 53 this, expression = expression, this 54 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 55 56 return _parse_binary_range
59def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 60 # Default argument order is base, expression 61 this = seq_get(args, 0) 62 expression = seq_get(args, 1) 63 64 if expression: 65 if not dialect.LOG_BASE_FIRST: 66 this, expression = expression, this 67 return exp.Log(this=this, expression=expression) 68 69 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
89def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 90 def _builder(args: t.List, dialect: Dialect) -> E: 91 expression = expr_type( 92 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 93 ) 94 if len(args) > 2 and expr_type is exp.JSONExtract: 95 expression.set("expressions", args[2:]) 96 97 return expression 98 99 return _builder
102def build_mod(args: t.List) -> exp.Mod: 103 this = seq_get(args, 0) 104 expression = seq_get(args, 1) 105 106 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 107 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 108 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 109 110 return exp.Mod(this=this, expression=expression)
122def build_array_constructor( 123 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 124) -> exp.Expression: 125 array_exp = exp_class(expressions=args) 126 127 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 128 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 129 130 return array_exp
133def build_convert_timezone( 134 args: t.List, default_source_tz: t.Optional[str] = None 135) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 136 if len(args) == 2: 137 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 138 return exp.ConvertTimezone( 139 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 140 ) 141 142 return exp.ConvertTimezone.from_arg_list(args)
175class Parser(metaclass=_Parser): 176 """ 177 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 178 179 Args: 180 error_level: The desired error level. 181 Default: ErrorLevel.IMMEDIATE 182 error_message_context: The amount of context to capture from a query string when displaying 183 the error message (in number of characters). 184 Default: 100 185 max_errors: Maximum number of error messages to include in a raised ParseError. 186 This is only relevant if error_level is ErrorLevel.RAISE. 187 Default: 3 188 """ 189 190 FUNCTIONS: t.Dict[str, t.Callable] = { 191 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 192 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 193 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 194 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 195 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 196 ), 197 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 198 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 199 ), 200 "CHAR": lambda args: exp.Chr(expressions=args), 201 "CHR": lambda args: exp.Chr(expressions=args), 202 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 203 "CONCAT": lambda args, dialect: exp.Concat( 204 expressions=args, 205 safe=not dialect.STRICT_STRING_CONCAT, 206 coalesce=dialect.CONCAT_COALESCE, 207 ), 208 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 209 expressions=args, 210 safe=not dialect.STRICT_STRING_CONCAT, 211 coalesce=dialect.CONCAT_COALESCE, 212 ), 213 "CONVERT_TIMEZONE": build_convert_timezone, 214 "DATE_TO_DATE_STR": lambda args: exp.Cast( 215 this=seq_get(args, 0), 216 to=exp.DataType(this=exp.DataType.Type.TEXT), 217 ), 218 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 219 start=seq_get(args, 0), 220 end=seq_get(args, 1), 221 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 222 ), 223 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 224 "HEX": build_hex, 225 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 226 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 227 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 228 "LIKE": build_like, 229 "LOG": build_logarithm, 230 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 231 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 232 "LOWER": build_lower, 233 "LPAD": lambda args: build_pad(args), 234 "LEFTPAD": lambda args: build_pad(args), 235 "LTRIM": lambda args: build_trim(args), 236 "MOD": build_mod, 237 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 238 "RPAD": lambda args: build_pad(args, is_left=False), 239 "RTRIM": lambda args: build_trim(args, is_left=False), 240 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 241 if len(args) != 2 242 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 243 "STRPOS": exp.StrPosition.from_arg_list, 244 "CHARINDEX": lambda args: build_locate_strposition(args), 245 "INSTR": exp.StrPosition.from_arg_list, 246 "LOCATE": lambda args: build_locate_strposition(args), 247 "TIME_TO_TIME_STR": lambda args: exp.Cast( 248 this=seq_get(args, 0), 249 to=exp.DataType(this=exp.DataType.Type.TEXT), 250 ), 251 "TO_HEX": build_hex, 252 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 253 this=exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 start=exp.Literal.number(1), 258 length=exp.Literal.number(10), 259 ), 260 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 261 "UPPER": build_upper, 262 "VAR_MAP": build_var_map, 263 } 264 265 NO_PAREN_FUNCTIONS = { 266 TokenType.CURRENT_DATE: exp.CurrentDate, 267 TokenType.CURRENT_DATETIME: exp.CurrentDate, 268 TokenType.CURRENT_TIME: exp.CurrentTime, 269 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 270 TokenType.CURRENT_USER: exp.CurrentUser, 271 } 272 273 STRUCT_TYPE_TOKENS = { 274 TokenType.NESTED, 275 TokenType.OBJECT, 276 TokenType.STRUCT, 277 TokenType.UNION, 278 } 279 280 NESTED_TYPE_TOKENS = { 281 TokenType.ARRAY, 282 TokenType.LIST, 283 TokenType.LOWCARDINALITY, 284 TokenType.MAP, 285 TokenType.NULLABLE, 286 TokenType.RANGE, 287 *STRUCT_TYPE_TOKENS, 288 } 289 290 ENUM_TYPE_TOKENS = { 291 TokenType.DYNAMIC, 292 TokenType.ENUM, 293 TokenType.ENUM8, 294 TokenType.ENUM16, 295 } 296 297 AGGREGATE_TYPE_TOKENS = { 298 TokenType.AGGREGATEFUNCTION, 299 TokenType.SIMPLEAGGREGATEFUNCTION, 300 } 301 302 TYPE_TOKENS = { 303 TokenType.BIT, 304 TokenType.BOOLEAN, 305 TokenType.TINYINT, 306 TokenType.UTINYINT, 307 TokenType.SMALLINT, 308 TokenType.USMALLINT, 309 TokenType.INT, 310 TokenType.UINT, 311 TokenType.BIGINT, 312 TokenType.UBIGINT, 313 TokenType.INT128, 314 TokenType.UINT128, 315 TokenType.INT256, 316 TokenType.UINT256, 317 TokenType.MEDIUMINT, 318 TokenType.UMEDIUMINT, 319 TokenType.FIXEDSTRING, 320 TokenType.FLOAT, 321 TokenType.DOUBLE, 322 TokenType.UDOUBLE, 323 TokenType.CHAR, 324 TokenType.NCHAR, 325 TokenType.VARCHAR, 326 TokenType.NVARCHAR, 327 TokenType.BPCHAR, 328 TokenType.TEXT, 329 TokenType.MEDIUMTEXT, 330 TokenType.LONGTEXT, 331 TokenType.BLOB, 332 TokenType.MEDIUMBLOB, 333 TokenType.LONGBLOB, 334 TokenType.BINARY, 335 TokenType.VARBINARY, 336 TokenType.JSON, 337 TokenType.JSONB, 338 TokenType.INTERVAL, 339 TokenType.TINYBLOB, 340 TokenType.TINYTEXT, 341 TokenType.TIME, 342 TokenType.TIMETZ, 343 TokenType.TIMESTAMP, 344 TokenType.TIMESTAMP_S, 345 TokenType.TIMESTAMP_MS, 346 TokenType.TIMESTAMP_NS, 347 TokenType.TIMESTAMPTZ, 348 TokenType.TIMESTAMPLTZ, 349 TokenType.TIMESTAMPNTZ, 350 TokenType.DATETIME, 351 TokenType.DATETIME2, 352 TokenType.DATETIME64, 353 TokenType.SMALLDATETIME, 354 TokenType.DATE, 355 TokenType.DATE32, 356 TokenType.INT4RANGE, 357 TokenType.INT4MULTIRANGE, 358 TokenType.INT8RANGE, 359 TokenType.INT8MULTIRANGE, 360 TokenType.NUMRANGE, 361 TokenType.NUMMULTIRANGE, 362 TokenType.TSRANGE, 363 TokenType.TSMULTIRANGE, 364 TokenType.TSTZRANGE, 365 TokenType.TSTZMULTIRANGE, 366 TokenType.DATERANGE, 367 TokenType.DATEMULTIRANGE, 368 TokenType.DECIMAL, 369 TokenType.DECIMAL32, 370 TokenType.DECIMAL64, 371 TokenType.DECIMAL128, 372 TokenType.DECIMAL256, 373 TokenType.UDECIMAL, 374 TokenType.BIGDECIMAL, 375 TokenType.UUID, 376 TokenType.GEOGRAPHY, 377 TokenType.GEOMETRY, 378 TokenType.POINT, 379 TokenType.RING, 380 TokenType.LINESTRING, 381 TokenType.MULTILINESTRING, 382 TokenType.POLYGON, 383 TokenType.MULTIPOLYGON, 384 TokenType.HLLSKETCH, 385 TokenType.HSTORE, 386 TokenType.PSEUDO_TYPE, 387 TokenType.SUPER, 388 TokenType.SERIAL, 389 TokenType.SMALLSERIAL, 390 TokenType.BIGSERIAL, 391 TokenType.XML, 392 TokenType.YEAR, 393 TokenType.USERDEFINED, 394 TokenType.MONEY, 395 TokenType.SMALLMONEY, 396 TokenType.ROWVERSION, 397 TokenType.IMAGE, 398 TokenType.VARIANT, 399 TokenType.VECTOR, 400 TokenType.OBJECT, 401 TokenType.OBJECT_IDENTIFIER, 402 TokenType.INET, 403 TokenType.IPADDRESS, 404 TokenType.IPPREFIX, 405 TokenType.IPV4, 406 TokenType.IPV6, 407 TokenType.UNKNOWN, 408 TokenType.NULL, 409 TokenType.NAME, 410 TokenType.TDIGEST, 411 TokenType.DYNAMIC, 412 *ENUM_TYPE_TOKENS, 413 *NESTED_TYPE_TOKENS, 414 *AGGREGATE_TYPE_TOKENS, 415 } 416 417 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 418 TokenType.BIGINT: TokenType.UBIGINT, 419 TokenType.INT: TokenType.UINT, 420 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 421 TokenType.SMALLINT: TokenType.USMALLINT, 422 TokenType.TINYINT: TokenType.UTINYINT, 423 TokenType.DECIMAL: TokenType.UDECIMAL, 424 TokenType.DOUBLE: TokenType.UDOUBLE, 425 } 426 427 SUBQUERY_PREDICATES = { 428 TokenType.ANY: exp.Any, 429 TokenType.ALL: exp.All, 430 TokenType.EXISTS: exp.Exists, 431 TokenType.SOME: exp.Any, 432 } 433 434 RESERVED_TOKENS = { 435 *Tokenizer.SINGLE_TOKENS.values(), 436 TokenType.SELECT, 437 } - {TokenType.IDENTIFIER} 438 439 DB_CREATABLES = { 440 TokenType.DATABASE, 441 TokenType.DICTIONARY, 442 TokenType.MODEL, 443 TokenType.NAMESPACE, 444 TokenType.SCHEMA, 445 TokenType.SEQUENCE, 446 TokenType.SINK, 447 TokenType.SOURCE, 448 TokenType.STORAGE_INTEGRATION, 449 TokenType.STREAMLIT, 450 TokenType.TABLE, 451 TokenType.TAG, 452 TokenType.VIEW, 453 TokenType.WAREHOUSE, 454 } 455 456 CREATABLES = { 457 TokenType.COLUMN, 458 TokenType.CONSTRAINT, 459 TokenType.FOREIGN_KEY, 460 TokenType.FUNCTION, 461 TokenType.INDEX, 462 TokenType.PROCEDURE, 463 *DB_CREATABLES, 464 } 465 466 ALTERABLES = { 467 TokenType.INDEX, 468 TokenType.TABLE, 469 TokenType.VIEW, 470 } 471 472 # Tokens that can represent identifiers 473 ID_VAR_TOKENS = { 474 TokenType.ALL, 475 TokenType.ATTACH, 476 TokenType.VAR, 477 TokenType.ANTI, 478 TokenType.APPLY, 479 TokenType.ASC, 480 TokenType.ASOF, 481 TokenType.AUTO_INCREMENT, 482 TokenType.BEGIN, 483 TokenType.BPCHAR, 484 TokenType.CACHE, 485 TokenType.CASE, 486 TokenType.COLLATE, 487 TokenType.COMMAND, 488 TokenType.COMMENT, 489 TokenType.COMMIT, 490 TokenType.CONSTRAINT, 491 TokenType.COPY, 492 TokenType.CUBE, 493 TokenType.CURRENT_SCHEMA, 494 TokenType.DEFAULT, 495 TokenType.DELETE, 496 TokenType.DESC, 497 TokenType.DESCRIBE, 498 TokenType.DETACH, 499 TokenType.DICTIONARY, 500 TokenType.DIV, 501 TokenType.END, 502 TokenType.EXECUTE, 503 TokenType.EXPORT, 504 TokenType.ESCAPE, 505 TokenType.FALSE, 506 TokenType.FIRST, 507 TokenType.FILTER, 508 TokenType.FINAL, 509 TokenType.FORMAT, 510 TokenType.FULL, 511 TokenType.IDENTIFIER, 512 TokenType.IS, 513 TokenType.ISNULL, 514 TokenType.INTERVAL, 515 TokenType.KEEP, 516 TokenType.KILL, 517 TokenType.LEFT, 518 TokenType.LIMIT, 519 TokenType.LOAD, 520 TokenType.MERGE, 521 TokenType.NATURAL, 522 TokenType.NEXT, 523 TokenType.OFFSET, 524 TokenType.OPERATOR, 525 TokenType.ORDINALITY, 526 TokenType.OVERLAPS, 527 TokenType.OVERWRITE, 528 TokenType.PARTITION, 529 TokenType.PERCENT, 530 TokenType.PIVOT, 531 TokenType.PRAGMA, 532 TokenType.PUT, 533 TokenType.RANGE, 534 TokenType.RECURSIVE, 535 TokenType.REFERENCES, 536 TokenType.REFRESH, 537 TokenType.RENAME, 538 TokenType.REPLACE, 539 TokenType.RIGHT, 540 TokenType.ROLLUP, 541 TokenType.ROW, 542 TokenType.ROWS, 543 TokenType.SEMI, 544 TokenType.SET, 545 TokenType.SETTINGS, 546 TokenType.SHOW, 547 TokenType.TEMPORARY, 548 TokenType.TOP, 549 TokenType.TRUE, 550 TokenType.TRUNCATE, 551 TokenType.UNIQUE, 552 TokenType.UNNEST, 553 TokenType.UNPIVOT, 554 TokenType.UPDATE, 555 TokenType.USE, 556 TokenType.VOLATILE, 557 TokenType.WINDOW, 558 *CREATABLES, 559 *SUBQUERY_PREDICATES, 560 *TYPE_TOKENS, 561 *NO_PAREN_FUNCTIONS, 562 } 563 ID_VAR_TOKENS.remove(TokenType.UNION) 564 565 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 566 TokenType.ANTI, 567 TokenType.APPLY, 568 TokenType.ASOF, 569 TokenType.FULL, 570 TokenType.LEFT, 571 TokenType.LOCK, 572 TokenType.NATURAL, 573 TokenType.RIGHT, 574 TokenType.SEMI, 575 TokenType.WINDOW, 576 } 577 578 ALIAS_TOKENS = ID_VAR_TOKENS 579 580 ARRAY_CONSTRUCTORS = { 581 "ARRAY": exp.Array, 582 "LIST": exp.List, 583 } 584 585 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 586 587 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 588 589 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 590 591 FUNC_TOKENS = { 592 TokenType.COLLATE, 593 TokenType.COMMAND, 594 TokenType.CURRENT_DATE, 595 TokenType.CURRENT_DATETIME, 596 TokenType.CURRENT_SCHEMA, 597 TokenType.CURRENT_TIMESTAMP, 598 TokenType.CURRENT_TIME, 599 TokenType.CURRENT_USER, 600 TokenType.FILTER, 601 TokenType.FIRST, 602 TokenType.FORMAT, 603 TokenType.GLOB, 604 TokenType.IDENTIFIER, 605 TokenType.INDEX, 606 TokenType.ISNULL, 607 TokenType.ILIKE, 608 TokenType.INSERT, 609 TokenType.LIKE, 610 TokenType.MERGE, 611 TokenType.NEXT, 612 TokenType.OFFSET, 613 TokenType.PRIMARY_KEY, 614 TokenType.RANGE, 615 TokenType.REPLACE, 616 TokenType.RLIKE, 617 TokenType.ROW, 618 TokenType.UNNEST, 619 TokenType.VAR, 620 TokenType.LEFT, 621 TokenType.RIGHT, 622 TokenType.SEQUENCE, 623 TokenType.DATE, 624 TokenType.DATETIME, 625 TokenType.TABLE, 626 TokenType.TIMESTAMP, 627 TokenType.TIMESTAMPTZ, 628 TokenType.TRUNCATE, 629 TokenType.WINDOW, 630 TokenType.XOR, 631 *TYPE_TOKENS, 632 *SUBQUERY_PREDICATES, 633 } 634 635 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 636 TokenType.AND: exp.And, 637 } 638 639 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 640 TokenType.COLON_EQ: exp.PropertyEQ, 641 } 642 643 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 644 TokenType.OR: exp.Or, 645 } 646 647 EQUALITY = { 648 TokenType.EQ: exp.EQ, 649 TokenType.NEQ: exp.NEQ, 650 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 651 } 652 653 COMPARISON = { 654 TokenType.GT: exp.GT, 655 TokenType.GTE: exp.GTE, 656 TokenType.LT: exp.LT, 657 TokenType.LTE: exp.LTE, 658 } 659 660 BITWISE = { 661 TokenType.AMP: exp.BitwiseAnd, 662 TokenType.CARET: exp.BitwiseXor, 663 TokenType.PIPE: exp.BitwiseOr, 664 } 665 666 TERM = { 667 TokenType.DASH: exp.Sub, 668 TokenType.PLUS: exp.Add, 669 TokenType.MOD: exp.Mod, 670 TokenType.COLLATE: exp.Collate, 671 } 672 673 FACTOR = { 674 TokenType.DIV: exp.IntDiv, 675 TokenType.LR_ARROW: exp.Distance, 676 TokenType.SLASH: exp.Div, 677 TokenType.STAR: exp.Mul, 678 } 679 680 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 681 682 TIMES = { 683 TokenType.TIME, 684 TokenType.TIMETZ, 685 } 686 687 TIMESTAMPS = { 688 TokenType.TIMESTAMP, 689 TokenType.TIMESTAMPTZ, 690 TokenType.TIMESTAMPLTZ, 691 *TIMES, 692 } 693 694 SET_OPERATIONS = { 695 TokenType.UNION, 696 TokenType.INTERSECT, 697 TokenType.EXCEPT, 698 } 699 700 JOIN_METHODS = { 701 TokenType.ASOF, 702 TokenType.NATURAL, 703 TokenType.POSITIONAL, 704 } 705 706 JOIN_SIDES = { 707 TokenType.LEFT, 708 TokenType.RIGHT, 709 TokenType.FULL, 710 } 711 712 JOIN_KINDS = { 713 TokenType.ANTI, 714 TokenType.CROSS, 715 TokenType.INNER, 716 TokenType.OUTER, 717 TokenType.SEMI, 718 TokenType.STRAIGHT_JOIN, 719 } 720 721 JOIN_HINTS: t.Set[str] = set() 722 723 LAMBDAS = { 724 TokenType.ARROW: lambda self, expressions: self.expression( 725 exp.Lambda, 726 this=self._replace_lambda( 727 self._parse_assignment(), 728 expressions, 729 ), 730 expressions=expressions, 731 ), 732 TokenType.FARROW: lambda self, expressions: self.expression( 733 exp.Kwarg, 734 this=exp.var(expressions[0].name), 735 expression=self._parse_assignment(), 736 ), 737 } 738 739 COLUMN_OPERATORS = { 740 TokenType.DOT: None, 741 TokenType.DOTCOLON: lambda self, this, to: self.expression( 742 exp.JSONCast, 743 this=this, 744 to=to, 745 ), 746 TokenType.DCOLON: lambda self, this, to: self.expression( 747 exp.Cast if self.STRICT_CAST else exp.TryCast, 748 this=this, 749 to=to, 750 ), 751 TokenType.ARROW: lambda self, this, path: self.expression( 752 exp.JSONExtract, 753 this=this, 754 expression=self.dialect.to_json_path(path), 755 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 756 ), 757 TokenType.DARROW: lambda self, this, path: self.expression( 758 exp.JSONExtractScalar, 759 this=this, 760 expression=self.dialect.to_json_path(path), 761 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 762 ), 763 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 764 exp.JSONBExtract, 765 this=this, 766 expression=path, 767 ), 768 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 769 exp.JSONBExtractScalar, 770 this=this, 771 expression=path, 772 ), 773 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 774 exp.JSONBContains, 775 this=this, 776 expression=key, 777 ), 778 } 779 780 EXPRESSION_PARSERS = { 781 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 782 exp.Column: lambda self: self._parse_column(), 783 exp.Condition: lambda self: self._parse_assignment(), 784 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 785 exp.Expression: lambda self: self._parse_expression(), 786 exp.From: lambda self: self._parse_from(joins=True), 787 exp.Group: lambda self: self._parse_group(), 788 exp.Having: lambda self: self._parse_having(), 789 exp.Hint: lambda self: self._parse_hint_body(), 790 exp.Identifier: lambda self: self._parse_id_var(), 791 exp.Join: lambda self: self._parse_join(), 792 exp.Lambda: lambda self: self._parse_lambda(), 793 exp.Lateral: lambda self: self._parse_lateral(), 794 exp.Limit: lambda self: self._parse_limit(), 795 exp.Offset: lambda self: self._parse_offset(), 796 exp.Order: lambda self: self._parse_order(), 797 exp.Ordered: lambda self: self._parse_ordered(), 798 exp.Properties: lambda self: self._parse_properties(), 799 exp.Qualify: lambda self: self._parse_qualify(), 800 exp.Returning: lambda self: self._parse_returning(), 801 exp.Select: lambda self: self._parse_select(), 802 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 803 exp.Table: lambda self: self._parse_table_parts(), 804 exp.TableAlias: lambda self: self._parse_table_alias(), 805 exp.Tuple: lambda self: self._parse_value(), 806 exp.Whens: lambda self: self._parse_when_matched(), 807 exp.Where: lambda self: self._parse_where(), 808 exp.Window: lambda self: self._parse_named_window(), 809 exp.With: lambda self: self._parse_with(), 810 "JOIN_TYPE": lambda self: self._parse_join_parts(), 811 } 812 813 STATEMENT_PARSERS = { 814 TokenType.ALTER: lambda self: self._parse_alter(), 815 TokenType.ANALYZE: lambda self: self._parse_analyze(), 816 TokenType.BEGIN: lambda self: self._parse_transaction(), 817 TokenType.CACHE: lambda self: self._parse_cache(), 818 TokenType.COMMENT: lambda self: self._parse_comment(), 819 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 820 TokenType.COPY: lambda self: self._parse_copy(), 821 TokenType.CREATE: lambda self: self._parse_create(), 822 TokenType.DELETE: lambda self: self._parse_delete(), 823 TokenType.DESC: lambda self: self._parse_describe(), 824 TokenType.DESCRIBE: lambda self: self._parse_describe(), 825 TokenType.DROP: lambda self: self._parse_drop(), 826 TokenType.GRANT: lambda self: self._parse_grant(), 827 TokenType.INSERT: lambda self: self._parse_insert(), 828 TokenType.KILL: lambda self: self._parse_kill(), 829 TokenType.LOAD: lambda self: self._parse_load(), 830 TokenType.MERGE: lambda self: self._parse_merge(), 831 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 832 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 833 TokenType.REFRESH: lambda self: self._parse_refresh(), 834 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 835 TokenType.SET: lambda self: self._parse_set(), 836 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 837 TokenType.UNCACHE: lambda self: self._parse_uncache(), 838 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 839 TokenType.UPDATE: lambda self: self._parse_update(), 840 TokenType.USE: lambda self: self._parse_use(), 841 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 842 } 843 844 UNARY_PARSERS = { 845 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 846 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 847 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 848 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 849 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 850 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 851 } 852 853 STRING_PARSERS = { 854 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 855 exp.RawString, this=token.text 856 ), 857 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 858 exp.National, this=token.text 859 ), 860 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 861 TokenType.STRING: lambda self, token: self.expression( 862 exp.Literal, this=token.text, is_string=True 863 ), 864 TokenType.UNICODE_STRING: lambda self, token: self.expression( 865 exp.UnicodeString, 866 this=token.text, 867 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 868 ), 869 } 870 871 NUMERIC_PARSERS = { 872 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 873 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 874 TokenType.HEX_STRING: lambda self, token: self.expression( 875 exp.HexString, 876 this=token.text, 877 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 878 ), 879 TokenType.NUMBER: lambda self, token: self.expression( 880 exp.Literal, this=token.text, is_string=False 881 ), 882 } 883 884 PRIMARY_PARSERS = { 885 **STRING_PARSERS, 886 **NUMERIC_PARSERS, 887 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 888 TokenType.NULL: lambda self, _: self.expression(exp.Null), 889 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 890 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 891 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 892 TokenType.STAR: lambda self, _: self._parse_star_ops(), 893 } 894 895 PLACEHOLDER_PARSERS = { 896 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 897 TokenType.PARAMETER: lambda self: self._parse_parameter(), 898 TokenType.COLON: lambda self: ( 899 self.expression(exp.Placeholder, this=self._prev.text) 900 if self._match_set(self.ID_VAR_TOKENS) 901 else None 902 ), 903 } 904 905 RANGE_PARSERS = { 906 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 907 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 908 TokenType.GLOB: binary_range_parser(exp.Glob), 909 TokenType.ILIKE: binary_range_parser(exp.ILike), 910 TokenType.IN: lambda self, this: self._parse_in(this), 911 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 912 TokenType.IS: lambda self, this: self._parse_is(this), 913 TokenType.LIKE: binary_range_parser(exp.Like), 914 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 915 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 916 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 917 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 918 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 919 } 920 921 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 922 "ALLOWED_VALUES": lambda self: self.expression( 923 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 924 ), 925 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 926 "AUTO": lambda self: self._parse_auto_property(), 927 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 928 "BACKUP": lambda self: self.expression( 929 exp.BackupProperty, this=self._parse_var(any_token=True) 930 ), 931 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 932 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 933 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 934 "CHECKSUM": lambda self: self._parse_checksum(), 935 "CLUSTER BY": lambda self: self._parse_cluster(), 936 "CLUSTERED": lambda self: self._parse_clustered_by(), 937 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 938 exp.CollateProperty, **kwargs 939 ), 940 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 941 "CONTAINS": lambda self: self._parse_contains_property(), 942 "COPY": lambda self: self._parse_copy_property(), 943 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 944 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 945 "DEFINER": lambda self: self._parse_definer(), 946 "DETERMINISTIC": lambda self: self.expression( 947 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 948 ), 949 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 950 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 951 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 952 "DISTKEY": lambda self: self._parse_distkey(), 953 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 954 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 955 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 956 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 957 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 958 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 959 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 960 "FREESPACE": lambda self: self._parse_freespace(), 961 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 962 "HEAP": lambda self: self.expression(exp.HeapProperty), 963 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 964 "IMMUTABLE": lambda self: self.expression( 965 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 966 ), 967 "INHERITS": lambda self: self.expression( 968 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 969 ), 970 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 971 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 972 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 973 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 974 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 975 "LIKE": lambda self: self._parse_create_like(), 976 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 977 "LOCK": lambda self: self._parse_locking(), 978 "LOCKING": lambda self: self._parse_locking(), 979 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 980 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 981 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 982 "MODIFIES": lambda self: self._parse_modifies_property(), 983 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 984 "NO": lambda self: self._parse_no_property(), 985 "ON": lambda self: self._parse_on_property(), 986 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 987 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 988 "PARTITION": lambda self: self._parse_partitioned_of(), 989 "PARTITION BY": lambda self: self._parse_partitioned_by(), 990 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 991 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 992 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 993 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 994 "READS": lambda self: self._parse_reads_property(), 995 "REMOTE": lambda self: self._parse_remote_with_connection(), 996 "RETURNS": lambda self: self._parse_returns(), 997 "STRICT": lambda self: self.expression(exp.StrictProperty), 998 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 999 "ROW": lambda self: self._parse_row(), 1000 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1001 "SAMPLE": lambda self: self.expression( 1002 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1003 ), 1004 "SECURE": lambda self: self.expression(exp.SecureProperty), 1005 "SECURITY": lambda self: self._parse_security(), 1006 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1007 "SETTINGS": lambda self: self._parse_settings_property(), 1008 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1009 "SORTKEY": lambda self: self._parse_sortkey(), 1010 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1011 "STABLE": lambda self: self.expression( 1012 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1013 ), 1014 "STORED": lambda self: self._parse_stored(), 1015 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1016 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1017 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1018 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1019 "TO": lambda self: self._parse_to_table(), 1020 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1021 "TRANSFORM": lambda self: self.expression( 1022 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1023 ), 1024 "TTL": lambda self: self._parse_ttl(), 1025 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1026 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1027 "VOLATILE": lambda self: self._parse_volatile_property(), 1028 "WITH": lambda self: self._parse_with_property(), 1029 } 1030 1031 CONSTRAINT_PARSERS = { 1032 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1033 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1034 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1035 "CHARACTER SET": lambda self: self.expression( 1036 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1037 ), 1038 "CHECK": lambda self: self.expression( 1039 exp.CheckColumnConstraint, 1040 this=self._parse_wrapped(self._parse_assignment), 1041 enforced=self._match_text_seq("ENFORCED"), 1042 ), 1043 "COLLATE": lambda self: self.expression( 1044 exp.CollateColumnConstraint, 1045 this=self._parse_identifier() or self._parse_column(), 1046 ), 1047 "COMMENT": lambda self: self.expression( 1048 exp.CommentColumnConstraint, this=self._parse_string() 1049 ), 1050 "COMPRESS": lambda self: self._parse_compress(), 1051 "CLUSTERED": lambda self: self.expression( 1052 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1053 ), 1054 "NONCLUSTERED": lambda self: self.expression( 1055 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1056 ), 1057 "DEFAULT": lambda self: self.expression( 1058 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1059 ), 1060 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1061 "EPHEMERAL": lambda self: self.expression( 1062 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1063 ), 1064 "EXCLUDE": lambda self: self.expression( 1065 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1066 ), 1067 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1068 "FORMAT": lambda self: self.expression( 1069 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1070 ), 1071 "GENERATED": lambda self: self._parse_generated_as_identity(), 1072 "IDENTITY": lambda self: self._parse_auto_increment(), 1073 "INLINE": lambda self: self._parse_inline(), 1074 "LIKE": lambda self: self._parse_create_like(), 1075 "NOT": lambda self: self._parse_not_constraint(), 1076 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1077 "ON": lambda self: ( 1078 self._match(TokenType.UPDATE) 1079 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1080 ) 1081 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1082 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1083 "PERIOD": lambda self: self._parse_period_for_system_time(), 1084 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1085 "REFERENCES": lambda self: self._parse_references(match=False), 1086 "TITLE": lambda self: self.expression( 1087 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1088 ), 1089 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1090 "UNIQUE": lambda self: self._parse_unique(), 1091 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1092 "WATERMARK": lambda self: self.expression( 1093 exp.WatermarkColumnConstraint, 1094 this=self._match(TokenType.FOR) and self._parse_column(), 1095 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1096 ), 1097 "WITH": lambda self: self.expression( 1098 exp.Properties, expressions=self._parse_wrapped_properties() 1099 ), 1100 } 1101 1102 ALTER_PARSERS = { 1103 "ADD": lambda self: self._parse_alter_table_add(), 1104 "AS": lambda self: self._parse_select(), 1105 "ALTER": lambda self: self._parse_alter_table_alter(), 1106 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1107 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1108 "DROP": lambda self: self._parse_alter_table_drop(), 1109 "RENAME": lambda self: self._parse_alter_table_rename(), 1110 "SET": lambda self: self._parse_alter_table_set(), 1111 "SWAP": lambda self: self.expression( 1112 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1113 ), 1114 } 1115 1116 ALTER_ALTER_PARSERS = { 1117 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1118 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1119 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1120 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1121 } 1122 1123 SCHEMA_UNNAMED_CONSTRAINTS = { 1124 "CHECK", 1125 "EXCLUDE", 1126 "FOREIGN KEY", 1127 "LIKE", 1128 "PERIOD", 1129 "PRIMARY KEY", 1130 "UNIQUE", 1131 "WATERMARK", 1132 } 1133 1134 NO_PAREN_FUNCTION_PARSERS = { 1135 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1136 "CASE": lambda self: self._parse_case(), 1137 "CONNECT_BY_ROOT": lambda self: self.expression( 1138 exp.ConnectByRoot, this=self._parse_column() 1139 ), 1140 "IF": lambda self: self._parse_if(), 1141 } 1142 1143 INVALID_FUNC_NAME_TOKENS = { 1144 TokenType.IDENTIFIER, 1145 TokenType.STRING, 1146 } 1147 1148 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1149 1150 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1151 1152 FUNCTION_PARSERS = { 1153 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1154 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1155 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1156 "DECODE": lambda self: self._parse_decode(), 1157 "EXTRACT": lambda self: self._parse_extract(), 1158 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1159 "GAP_FILL": lambda self: self._parse_gap_fill(), 1160 "JSON_OBJECT": lambda self: self._parse_json_object(), 1161 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1162 "JSON_TABLE": lambda self: self._parse_json_table(), 1163 "MATCH": lambda self: self._parse_match_against(), 1164 "NORMALIZE": lambda self: self._parse_normalize(), 1165 "OPENJSON": lambda self: self._parse_open_json(), 1166 "OVERLAY": lambda self: self._parse_overlay(), 1167 "POSITION": lambda self: self._parse_position(), 1168 "PREDICT": lambda self: self._parse_predict(), 1169 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1170 "STRING_AGG": lambda self: self._parse_string_agg(), 1171 "SUBSTRING": lambda self: self._parse_substring(), 1172 "TRIM": lambda self: self._parse_trim(), 1173 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1174 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1175 "XMLELEMENT": lambda self: self.expression( 1176 exp.XMLElement, 1177 this=self._match_text_seq("NAME") and self._parse_id_var(), 1178 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1179 ), 1180 "XMLTABLE": lambda self: self._parse_xml_table(), 1181 } 1182 1183 QUERY_MODIFIER_PARSERS = { 1184 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1185 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1186 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1187 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1188 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1189 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1190 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1191 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1192 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1193 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1194 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1195 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1196 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1197 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1198 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1199 TokenType.CLUSTER_BY: lambda self: ( 1200 "cluster", 1201 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1202 ), 1203 TokenType.DISTRIBUTE_BY: lambda self: ( 1204 "distribute", 1205 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1206 ), 1207 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1208 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1209 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1210 } 1211 1212 SET_PARSERS = { 1213 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1214 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1215 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1216 "TRANSACTION": lambda self: self._parse_set_transaction(), 1217 } 1218 1219 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1220 1221 TYPE_LITERAL_PARSERS = { 1222 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1223 } 1224 1225 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1226 1227 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1228 1229 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1230 1231 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1232 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1233 "ISOLATION": ( 1234 ("LEVEL", "REPEATABLE", "READ"), 1235 ("LEVEL", "READ", "COMMITTED"), 1236 ("LEVEL", "READ", "UNCOMITTED"), 1237 ("LEVEL", "SERIALIZABLE"), 1238 ), 1239 "READ": ("WRITE", "ONLY"), 1240 } 1241 1242 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1243 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1244 ) 1245 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1246 1247 CREATE_SEQUENCE: OPTIONS_TYPE = { 1248 "SCALE": ("EXTEND", "NOEXTEND"), 1249 "SHARD": ("EXTEND", "NOEXTEND"), 1250 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1251 **dict.fromkeys( 1252 ( 1253 "SESSION", 1254 "GLOBAL", 1255 "KEEP", 1256 "NOKEEP", 1257 "ORDER", 1258 "NOORDER", 1259 "NOCACHE", 1260 "CYCLE", 1261 "NOCYCLE", 1262 "NOMINVALUE", 1263 "NOMAXVALUE", 1264 "NOSCALE", 1265 "NOSHARD", 1266 ), 1267 tuple(), 1268 ), 1269 } 1270 1271 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1272 1273 USABLES: OPTIONS_TYPE = dict.fromkeys( 1274 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1275 ) 1276 1277 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1278 1279 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1280 "TYPE": ("EVOLUTION",), 1281 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1282 } 1283 1284 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1285 1286 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1287 1288 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1289 "NOT": ("ENFORCED",), 1290 "MATCH": ( 1291 "FULL", 1292 "PARTIAL", 1293 "SIMPLE", 1294 ), 1295 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1296 "USING": ( 1297 "BTREE", 1298 "HASH", 1299 ), 1300 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1301 } 1302 1303 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1304 1305 CLONE_KEYWORDS = {"CLONE", "COPY"} 1306 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1307 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1308 1309 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1310 1311 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1312 1313 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1314 1315 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1316 1317 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1318 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1319 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1320 1321 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1322 1323 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1324 1325 ADD_CONSTRAINT_TOKENS = { 1326 TokenType.CONSTRAINT, 1327 TokenType.FOREIGN_KEY, 1328 TokenType.INDEX, 1329 TokenType.KEY, 1330 TokenType.PRIMARY_KEY, 1331 TokenType.UNIQUE, 1332 } 1333 1334 DISTINCT_TOKENS = {TokenType.DISTINCT} 1335 1336 NULL_TOKENS = {TokenType.NULL} 1337 1338 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1339 1340 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1341 1342 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1343 1344 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1345 1346 ODBC_DATETIME_LITERALS = { 1347 "d": exp.Date, 1348 "t": exp.Time, 1349 "ts": exp.Timestamp, 1350 } 1351 1352 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1353 1354 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1355 1356 # The style options for the DESCRIBE statement 1357 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1358 1359 # The style options for the ANALYZE statement 1360 ANALYZE_STYLES = { 1361 "BUFFER_USAGE_LIMIT", 1362 "FULL", 1363 "LOCAL", 1364 "NO_WRITE_TO_BINLOG", 1365 "SAMPLE", 1366 "SKIP_LOCKED", 1367 "VERBOSE", 1368 } 1369 1370 ANALYZE_EXPRESSION_PARSERS = { 1371 "ALL": lambda self: self._parse_analyze_columns(), 1372 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1373 "DELETE": lambda self: self._parse_analyze_delete(), 1374 "DROP": lambda self: self._parse_analyze_histogram(), 1375 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1376 "LIST": lambda self: self._parse_analyze_list(), 1377 "PREDICATE": lambda self: self._parse_analyze_columns(), 1378 "UPDATE": lambda self: self._parse_analyze_histogram(), 1379 "VALIDATE": lambda self: self._parse_analyze_validate(), 1380 } 1381 1382 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1383 1384 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1385 1386 OPERATION_MODIFIERS: t.Set[str] = set() 1387 1388 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1389 1390 STRICT_CAST = True 1391 1392 PREFIXED_PIVOT_COLUMNS = False 1393 IDENTIFY_PIVOT_STRINGS = False 1394 1395 LOG_DEFAULTS_TO_LN = False 1396 1397 # Whether ADD is present for each column added by ALTER TABLE 1398 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1399 1400 # Whether the table sample clause expects CSV syntax 1401 TABLESAMPLE_CSV = False 1402 1403 # The default method used for table sampling 1404 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1405 1406 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1407 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1408 1409 # Whether the TRIM function expects the characters to trim as its first argument 1410 TRIM_PATTERN_FIRST = False 1411 1412 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1413 STRING_ALIASES = False 1414 1415 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1416 MODIFIERS_ATTACHED_TO_SET_OP = True 1417 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1418 1419 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1420 NO_PAREN_IF_COMMANDS = True 1421 1422 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1423 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1424 1425 # Whether the `:` operator is used to extract a value from a VARIANT column 1426 COLON_IS_VARIANT_EXTRACT = False 1427 1428 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1429 # If this is True and '(' is not found, the keyword will be treated as an identifier 1430 VALUES_FOLLOWED_BY_PAREN = True 1431 1432 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1433 SUPPORTS_IMPLICIT_UNNEST = False 1434 1435 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1436 INTERVAL_SPANS = True 1437 1438 # Whether a PARTITION clause can follow a table reference 1439 SUPPORTS_PARTITION_SELECTION = False 1440 1441 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1442 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1443 1444 # Whether the 'AS' keyword is optional in the CTE definition syntax 1445 OPTIONAL_ALIAS_TOKEN_CTE = True 1446 1447 __slots__ = ( 1448 "error_level", 1449 "error_message_context", 1450 "max_errors", 1451 "dialect", 1452 "sql", 1453 "errors", 1454 "_tokens", 1455 "_index", 1456 "_curr", 1457 "_next", 1458 "_prev", 1459 "_prev_comments", 1460 ) 1461 1462 # Autofilled 1463 SHOW_TRIE: t.Dict = {} 1464 SET_TRIE: t.Dict = {} 1465 1466 def __init__( 1467 self, 1468 error_level: t.Optional[ErrorLevel] = None, 1469 error_message_context: int = 100, 1470 max_errors: int = 3, 1471 dialect: DialectType = None, 1472 ): 1473 from sqlglot.dialects import Dialect 1474 1475 self.error_level = error_level or ErrorLevel.IMMEDIATE 1476 self.error_message_context = error_message_context 1477 self.max_errors = max_errors 1478 self.dialect = Dialect.get_or_raise(dialect) 1479 self.reset() 1480 1481 def reset(self): 1482 self.sql = "" 1483 self.errors = [] 1484 self._tokens = [] 1485 self._index = 0 1486 self._curr = None 1487 self._next = None 1488 self._prev = None 1489 self._prev_comments = None 1490 1491 def parse( 1492 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1493 ) -> t.List[t.Optional[exp.Expression]]: 1494 """ 1495 Parses a list of tokens and returns a list of syntax trees, one tree 1496 per parsed SQL statement. 1497 1498 Args: 1499 raw_tokens: The list of tokens. 1500 sql: The original SQL string, used to produce helpful debug messages. 1501 1502 Returns: 1503 The list of the produced syntax trees. 1504 """ 1505 return self._parse( 1506 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1507 ) 1508 1509 def parse_into( 1510 self, 1511 expression_types: exp.IntoType, 1512 raw_tokens: t.List[Token], 1513 sql: t.Optional[str] = None, 1514 ) -> t.List[t.Optional[exp.Expression]]: 1515 """ 1516 Parses a list of tokens into a given Expression type. If a collection of Expression 1517 types is given instead, this method will try to parse the token list into each one 1518 of them, stopping at the first for which the parsing succeeds. 1519 1520 Args: 1521 expression_types: The expression type(s) to try and parse the token list into. 1522 raw_tokens: The list of tokens. 1523 sql: The original SQL string, used to produce helpful debug messages. 1524 1525 Returns: 1526 The target Expression. 1527 """ 1528 errors = [] 1529 for expression_type in ensure_list(expression_types): 1530 parser = self.EXPRESSION_PARSERS.get(expression_type) 1531 if not parser: 1532 raise TypeError(f"No parser registered for {expression_type}") 1533 1534 try: 1535 return self._parse(parser, raw_tokens, sql) 1536 except ParseError as e: 1537 e.errors[0]["into_expression"] = expression_type 1538 errors.append(e) 1539 1540 raise ParseError( 1541 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1542 errors=merge_errors(errors), 1543 ) from errors[-1] 1544 1545 def _parse( 1546 self, 1547 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1548 raw_tokens: t.List[Token], 1549 sql: t.Optional[str] = None, 1550 ) -> t.List[t.Optional[exp.Expression]]: 1551 self.reset() 1552 self.sql = sql or "" 1553 1554 total = len(raw_tokens) 1555 chunks: t.List[t.List[Token]] = [[]] 1556 1557 for i, token in enumerate(raw_tokens): 1558 if token.token_type == TokenType.SEMICOLON: 1559 if token.comments: 1560 chunks.append([token]) 1561 1562 if i < total - 1: 1563 chunks.append([]) 1564 else: 1565 chunks[-1].append(token) 1566 1567 expressions = [] 1568 1569 for tokens in chunks: 1570 self._index = -1 1571 self._tokens = tokens 1572 self._advance() 1573 1574 expressions.append(parse_method(self)) 1575 1576 if self._index < len(self._tokens): 1577 self.raise_error("Invalid expression / Unexpected token") 1578 1579 self.check_errors() 1580 1581 return expressions 1582 1583 def check_errors(self) -> None: 1584 """Logs or raises any found errors, depending on the chosen error level setting.""" 1585 if self.error_level == ErrorLevel.WARN: 1586 for error in self.errors: 1587 logger.error(str(error)) 1588 elif self.error_level == ErrorLevel.RAISE and self.errors: 1589 raise ParseError( 1590 concat_messages(self.errors, self.max_errors), 1591 errors=merge_errors(self.errors), 1592 ) 1593 1594 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1595 """ 1596 Appends an error in the list of recorded errors or raises it, depending on the chosen 1597 error level setting. 1598 """ 1599 token = token or self._curr or self._prev or Token.string("") 1600 start = token.start 1601 end = token.end + 1 1602 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1603 highlight = self.sql[start:end] 1604 end_context = self.sql[end : end + self.error_message_context] 1605 1606 error = ParseError.new( 1607 f"{message}. Line {token.line}, Col: {token.col}.\n" 1608 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1609 description=message, 1610 line=token.line, 1611 col=token.col, 1612 start_context=start_context, 1613 highlight=highlight, 1614 end_context=end_context, 1615 ) 1616 1617 if self.error_level == ErrorLevel.IMMEDIATE: 1618 raise error 1619 1620 self.errors.append(error) 1621 1622 def expression( 1623 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1624 ) -> E: 1625 """ 1626 Creates a new, validated Expression. 1627 1628 Args: 1629 exp_class: The expression class to instantiate. 1630 comments: An optional list of comments to attach to the expression. 1631 kwargs: The arguments to set for the expression along with their respective values. 1632 1633 Returns: 1634 The target expression. 1635 """ 1636 instance = exp_class(**kwargs) 1637 instance.add_comments(comments) if comments else self._add_comments(instance) 1638 return self.validate_expression(instance) 1639 1640 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1641 if expression and self._prev_comments: 1642 expression.add_comments(self._prev_comments) 1643 self._prev_comments = None 1644 1645 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1646 """ 1647 Validates an Expression, making sure that all its mandatory arguments are set. 1648 1649 Args: 1650 expression: The expression to validate. 1651 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1652 1653 Returns: 1654 The validated expression. 1655 """ 1656 if self.error_level != ErrorLevel.IGNORE: 1657 for error_message in expression.error_messages(args): 1658 self.raise_error(error_message) 1659 1660 return expression 1661 1662 def _find_sql(self, start: Token, end: Token) -> str: 1663 return self.sql[start.start : end.end + 1] 1664 1665 def _is_connected(self) -> bool: 1666 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1667 1668 def _advance(self, times: int = 1) -> None: 1669 self._index += times 1670 self._curr = seq_get(self._tokens, self._index) 1671 self._next = seq_get(self._tokens, self._index + 1) 1672 1673 if self._index > 0: 1674 self._prev = self._tokens[self._index - 1] 1675 self._prev_comments = self._prev.comments 1676 else: 1677 self._prev = None 1678 self._prev_comments = None 1679 1680 def _retreat(self, index: int) -> None: 1681 if index != self._index: 1682 self._advance(index - self._index) 1683 1684 def _warn_unsupported(self) -> None: 1685 if len(self._tokens) <= 1: 1686 return 1687 1688 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1689 # interested in emitting a warning for the one being currently processed. 1690 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1691 1692 logger.warning( 1693 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1694 ) 1695 1696 def _parse_command(self) -> exp.Command: 1697 self._warn_unsupported() 1698 return self.expression( 1699 exp.Command, 1700 comments=self._prev_comments, 1701 this=self._prev.text.upper(), 1702 expression=self._parse_string(), 1703 ) 1704 1705 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1706 """ 1707 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1708 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1709 solve this by setting & resetting the parser state accordingly 1710 """ 1711 index = self._index 1712 error_level = self.error_level 1713 1714 self.error_level = ErrorLevel.IMMEDIATE 1715 try: 1716 this = parse_method() 1717 except ParseError: 1718 this = None 1719 finally: 1720 if not this or retreat: 1721 self._retreat(index) 1722 self.error_level = error_level 1723 1724 return this 1725 1726 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1727 start = self._prev 1728 exists = self._parse_exists() if allow_exists else None 1729 1730 self._match(TokenType.ON) 1731 1732 materialized = self._match_text_seq("MATERIALIZED") 1733 kind = self._match_set(self.CREATABLES) and self._prev 1734 if not kind: 1735 return self._parse_as_command(start) 1736 1737 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1738 this = self._parse_user_defined_function(kind=kind.token_type) 1739 elif kind.token_type == TokenType.TABLE: 1740 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1741 elif kind.token_type == TokenType.COLUMN: 1742 this = self._parse_column() 1743 else: 1744 this = self._parse_id_var() 1745 1746 self._match(TokenType.IS) 1747 1748 return self.expression( 1749 exp.Comment, 1750 this=this, 1751 kind=kind.text, 1752 expression=self._parse_string(), 1753 exists=exists, 1754 materialized=materialized, 1755 ) 1756 1757 def _parse_to_table( 1758 self, 1759 ) -> exp.ToTableProperty: 1760 table = self._parse_table_parts(schema=True) 1761 return self.expression(exp.ToTableProperty, this=table) 1762 1763 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1764 def _parse_ttl(self) -> exp.Expression: 1765 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1766 this = self._parse_bitwise() 1767 1768 if self._match_text_seq("DELETE"): 1769 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1770 if self._match_text_seq("RECOMPRESS"): 1771 return self.expression( 1772 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1773 ) 1774 if self._match_text_seq("TO", "DISK"): 1775 return self.expression( 1776 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1777 ) 1778 if self._match_text_seq("TO", "VOLUME"): 1779 return self.expression( 1780 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1781 ) 1782 1783 return this 1784 1785 expressions = self._parse_csv(_parse_ttl_action) 1786 where = self._parse_where() 1787 group = self._parse_group() 1788 1789 aggregates = None 1790 if group and self._match(TokenType.SET): 1791 aggregates = self._parse_csv(self._parse_set_item) 1792 1793 return self.expression( 1794 exp.MergeTreeTTL, 1795 expressions=expressions, 1796 where=where, 1797 group=group, 1798 aggregates=aggregates, 1799 ) 1800 1801 def _parse_statement(self) -> t.Optional[exp.Expression]: 1802 if self._curr is None: 1803 return None 1804 1805 if self._match_set(self.STATEMENT_PARSERS): 1806 comments = self._prev_comments 1807 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1808 stmt.add_comments(comments, prepend=True) 1809 return stmt 1810 1811 if self._match_set(self.dialect.tokenizer.COMMANDS): 1812 return self._parse_command() 1813 1814 expression = self._parse_expression() 1815 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1816 return self._parse_query_modifiers(expression) 1817 1818 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1819 start = self._prev 1820 temporary = self._match(TokenType.TEMPORARY) 1821 materialized = self._match_text_seq("MATERIALIZED") 1822 1823 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1824 if not kind: 1825 return self._parse_as_command(start) 1826 1827 concurrently = self._match_text_seq("CONCURRENTLY") 1828 if_exists = exists or self._parse_exists() 1829 1830 if kind == "COLUMN": 1831 this = self._parse_column() 1832 else: 1833 this = self._parse_table_parts( 1834 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1835 ) 1836 1837 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1838 1839 if self._match(TokenType.L_PAREN, advance=False): 1840 expressions = self._parse_wrapped_csv(self._parse_types) 1841 else: 1842 expressions = None 1843 1844 return self.expression( 1845 exp.Drop, 1846 exists=if_exists, 1847 this=this, 1848 expressions=expressions, 1849 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1850 temporary=temporary, 1851 materialized=materialized, 1852 cascade=self._match_text_seq("CASCADE"), 1853 constraints=self._match_text_seq("CONSTRAINTS"), 1854 purge=self._match_text_seq("PURGE"), 1855 cluster=cluster, 1856 concurrently=concurrently, 1857 ) 1858 1859 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1860 return ( 1861 self._match_text_seq("IF") 1862 and (not not_ or self._match(TokenType.NOT)) 1863 and self._match(TokenType.EXISTS) 1864 ) 1865 1866 def _parse_create(self) -> exp.Create | exp.Command: 1867 # Note: this can't be None because we've matched a statement parser 1868 start = self._prev 1869 1870 replace = ( 1871 start.token_type == TokenType.REPLACE 1872 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1873 or self._match_pair(TokenType.OR, TokenType.ALTER) 1874 ) 1875 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1876 1877 unique = self._match(TokenType.UNIQUE) 1878 1879 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1880 clustered = True 1881 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1882 "COLUMNSTORE" 1883 ): 1884 clustered = False 1885 else: 1886 clustered = None 1887 1888 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1889 self._advance() 1890 1891 properties = None 1892 create_token = self._match_set(self.CREATABLES) and self._prev 1893 1894 if not create_token: 1895 # exp.Properties.Location.POST_CREATE 1896 properties = self._parse_properties() 1897 create_token = self._match_set(self.CREATABLES) and self._prev 1898 1899 if not properties or not create_token: 1900 return self._parse_as_command(start) 1901 1902 concurrently = self._match_text_seq("CONCURRENTLY") 1903 exists = self._parse_exists(not_=True) 1904 this = None 1905 expression: t.Optional[exp.Expression] = None 1906 indexes = None 1907 no_schema_binding = None 1908 begin = None 1909 end = None 1910 clone = None 1911 1912 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1913 nonlocal properties 1914 if properties and temp_props: 1915 properties.expressions.extend(temp_props.expressions) 1916 elif temp_props: 1917 properties = temp_props 1918 1919 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1920 this = self._parse_user_defined_function(kind=create_token.token_type) 1921 1922 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1923 extend_props(self._parse_properties()) 1924 1925 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1926 extend_props(self._parse_properties()) 1927 1928 if not expression: 1929 if self._match(TokenType.COMMAND): 1930 expression = self._parse_as_command(self._prev) 1931 else: 1932 begin = self._match(TokenType.BEGIN) 1933 return_ = self._match_text_seq("RETURN") 1934 1935 if self._match(TokenType.STRING, advance=False): 1936 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1937 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1938 expression = self._parse_string() 1939 extend_props(self._parse_properties()) 1940 else: 1941 expression = self._parse_user_defined_function_expression() 1942 1943 end = self._match_text_seq("END") 1944 1945 if return_: 1946 expression = self.expression(exp.Return, this=expression) 1947 elif create_token.token_type == TokenType.INDEX: 1948 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1949 if not self._match(TokenType.ON): 1950 index = self._parse_id_var() 1951 anonymous = False 1952 else: 1953 index = None 1954 anonymous = True 1955 1956 this = self._parse_index(index=index, anonymous=anonymous) 1957 elif create_token.token_type in self.DB_CREATABLES: 1958 table_parts = self._parse_table_parts( 1959 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1960 ) 1961 1962 # exp.Properties.Location.POST_NAME 1963 self._match(TokenType.COMMA) 1964 extend_props(self._parse_properties(before=True)) 1965 1966 this = self._parse_schema(this=table_parts) 1967 1968 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1969 extend_props(self._parse_properties()) 1970 1971 self._match(TokenType.ALIAS) 1972 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1973 # exp.Properties.Location.POST_ALIAS 1974 extend_props(self._parse_properties()) 1975 1976 if create_token.token_type == TokenType.SEQUENCE: 1977 expression = self._parse_types() 1978 extend_props(self._parse_properties()) 1979 else: 1980 expression = self._parse_ddl_select() 1981 1982 if create_token.token_type == TokenType.TABLE: 1983 # exp.Properties.Location.POST_EXPRESSION 1984 extend_props(self._parse_properties()) 1985 1986 indexes = [] 1987 while True: 1988 index = self._parse_index() 1989 1990 # exp.Properties.Location.POST_INDEX 1991 extend_props(self._parse_properties()) 1992 if not index: 1993 break 1994 else: 1995 self._match(TokenType.COMMA) 1996 indexes.append(index) 1997 elif create_token.token_type == TokenType.VIEW: 1998 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1999 no_schema_binding = True 2000 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2001 extend_props(self._parse_properties()) 2002 2003 shallow = self._match_text_seq("SHALLOW") 2004 2005 if self._match_texts(self.CLONE_KEYWORDS): 2006 copy = self._prev.text.lower() == "copy" 2007 clone = self.expression( 2008 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2009 ) 2010 2011 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2012 return self._parse_as_command(start) 2013 2014 create_kind_text = create_token.text.upper() 2015 return self.expression( 2016 exp.Create, 2017 this=this, 2018 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2019 replace=replace, 2020 refresh=refresh, 2021 unique=unique, 2022 expression=expression, 2023 exists=exists, 2024 properties=properties, 2025 indexes=indexes, 2026 no_schema_binding=no_schema_binding, 2027 begin=begin, 2028 end=end, 2029 clone=clone, 2030 concurrently=concurrently, 2031 clustered=clustered, 2032 ) 2033 2034 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2035 seq = exp.SequenceProperties() 2036 2037 options = [] 2038 index = self._index 2039 2040 while self._curr: 2041 self._match(TokenType.COMMA) 2042 if self._match_text_seq("INCREMENT"): 2043 self._match_text_seq("BY") 2044 self._match_text_seq("=") 2045 seq.set("increment", self._parse_term()) 2046 elif self._match_text_seq("MINVALUE"): 2047 seq.set("minvalue", self._parse_term()) 2048 elif self._match_text_seq("MAXVALUE"): 2049 seq.set("maxvalue", self._parse_term()) 2050 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2051 self._match_text_seq("=") 2052 seq.set("start", self._parse_term()) 2053 elif self._match_text_seq("CACHE"): 2054 # T-SQL allows empty CACHE which is initialized dynamically 2055 seq.set("cache", self._parse_number() or True) 2056 elif self._match_text_seq("OWNED", "BY"): 2057 # "OWNED BY NONE" is the default 2058 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2059 else: 2060 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2061 if opt: 2062 options.append(opt) 2063 else: 2064 break 2065 2066 seq.set("options", options if options else None) 2067 return None if self._index == index else seq 2068 2069 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2070 # only used for teradata currently 2071 self._match(TokenType.COMMA) 2072 2073 kwargs = { 2074 "no": self._match_text_seq("NO"), 2075 "dual": self._match_text_seq("DUAL"), 2076 "before": self._match_text_seq("BEFORE"), 2077 "default": self._match_text_seq("DEFAULT"), 2078 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2079 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2080 "after": self._match_text_seq("AFTER"), 2081 "minimum": self._match_texts(("MIN", "MINIMUM")), 2082 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2083 } 2084 2085 if self._match_texts(self.PROPERTY_PARSERS): 2086 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2087 try: 2088 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2089 except TypeError: 2090 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2091 2092 return None 2093 2094 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2095 return self._parse_wrapped_csv(self._parse_property) 2096 2097 def _parse_property(self) -> t.Optional[exp.Expression]: 2098 if self._match_texts(self.PROPERTY_PARSERS): 2099 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2100 2101 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2102 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2103 2104 if self._match_text_seq("COMPOUND", "SORTKEY"): 2105 return self._parse_sortkey(compound=True) 2106 2107 if self._match_text_seq("SQL", "SECURITY"): 2108 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2109 2110 index = self._index 2111 key = self._parse_column() 2112 2113 if not self._match(TokenType.EQ): 2114 self._retreat(index) 2115 return self._parse_sequence_properties() 2116 2117 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2118 if isinstance(key, exp.Column): 2119 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2120 2121 value = self._parse_bitwise() or self._parse_var(any_token=True) 2122 2123 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2124 if isinstance(value, exp.Column): 2125 value = exp.var(value.name) 2126 2127 return self.expression(exp.Property, this=key, value=value) 2128 2129 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2130 if self._match_text_seq("BY"): 2131 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2132 2133 self._match(TokenType.ALIAS) 2134 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2135 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2136 2137 return self.expression( 2138 exp.FileFormatProperty, 2139 this=( 2140 self.expression( 2141 exp.InputOutputFormat, 2142 input_format=input_format, 2143 output_format=output_format, 2144 ) 2145 if input_format or output_format 2146 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2147 ), 2148 ) 2149 2150 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2151 field = self._parse_field() 2152 if isinstance(field, exp.Identifier) and not field.quoted: 2153 field = exp.var(field) 2154 2155 return field 2156 2157 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2158 self._match(TokenType.EQ) 2159 self._match(TokenType.ALIAS) 2160 2161 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2162 2163 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2164 properties = [] 2165 while True: 2166 if before: 2167 prop = self._parse_property_before() 2168 else: 2169 prop = self._parse_property() 2170 if not prop: 2171 break 2172 for p in ensure_list(prop): 2173 properties.append(p) 2174 2175 if properties: 2176 return self.expression(exp.Properties, expressions=properties) 2177 2178 return None 2179 2180 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2181 return self.expression( 2182 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2183 ) 2184 2185 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2186 if self._match_texts(("DEFINER", "INVOKER")): 2187 security_specifier = self._prev.text.upper() 2188 return self.expression(exp.SecurityProperty, this=security_specifier) 2189 return None 2190 2191 def _parse_settings_property(self) -> exp.SettingsProperty: 2192 return self.expression( 2193 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2194 ) 2195 2196 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2197 if self._index >= 2: 2198 pre_volatile_token = self._tokens[self._index - 2] 2199 else: 2200 pre_volatile_token = None 2201 2202 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2203 return exp.VolatileProperty() 2204 2205 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2206 2207 def _parse_retention_period(self) -> exp.Var: 2208 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2209 number = self._parse_number() 2210 number_str = f"{number} " if number else "" 2211 unit = self._parse_var(any_token=True) 2212 return exp.var(f"{number_str}{unit}") 2213 2214 def _parse_system_versioning_property( 2215 self, with_: bool = False 2216 ) -> exp.WithSystemVersioningProperty: 2217 self._match(TokenType.EQ) 2218 prop = self.expression( 2219 exp.WithSystemVersioningProperty, 2220 **{ # type: ignore 2221 "on": True, 2222 "with": with_, 2223 }, 2224 ) 2225 2226 if self._match_text_seq("OFF"): 2227 prop.set("on", False) 2228 return prop 2229 2230 self._match(TokenType.ON) 2231 if self._match(TokenType.L_PAREN): 2232 while self._curr and not self._match(TokenType.R_PAREN): 2233 if self._match_text_seq("HISTORY_TABLE", "="): 2234 prop.set("this", self._parse_table_parts()) 2235 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2236 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2237 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2238 prop.set("retention_period", self._parse_retention_period()) 2239 2240 self._match(TokenType.COMMA) 2241 2242 return prop 2243 2244 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2245 self._match(TokenType.EQ) 2246 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2247 prop = self.expression(exp.DataDeletionProperty, on=on) 2248 2249 if self._match(TokenType.L_PAREN): 2250 while self._curr and not self._match(TokenType.R_PAREN): 2251 if self._match_text_seq("FILTER_COLUMN", "="): 2252 prop.set("filter_column", self._parse_column()) 2253 elif self._match_text_seq("RETENTION_PERIOD", "="): 2254 prop.set("retention_period", self._parse_retention_period()) 2255 2256 self._match(TokenType.COMMA) 2257 2258 return prop 2259 2260 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2261 kind = "HASH" 2262 expressions: t.Optional[t.List[exp.Expression]] = None 2263 if self._match_text_seq("BY", "HASH"): 2264 expressions = self._parse_wrapped_csv(self._parse_id_var) 2265 elif self._match_text_seq("BY", "RANDOM"): 2266 kind = "RANDOM" 2267 2268 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2269 buckets: t.Optional[exp.Expression] = None 2270 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2271 buckets = self._parse_number() 2272 2273 return self.expression( 2274 exp.DistributedByProperty, 2275 expressions=expressions, 2276 kind=kind, 2277 buckets=buckets, 2278 order=self._parse_order(), 2279 ) 2280 2281 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2282 self._match_text_seq("KEY") 2283 expressions = self._parse_wrapped_id_vars() 2284 return self.expression(expr_type, expressions=expressions) 2285 2286 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2287 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2288 prop = self._parse_system_versioning_property(with_=True) 2289 self._match_r_paren() 2290 return prop 2291 2292 if self._match(TokenType.L_PAREN, advance=False): 2293 return self._parse_wrapped_properties() 2294 2295 if self._match_text_seq("JOURNAL"): 2296 return self._parse_withjournaltable() 2297 2298 if self._match_texts(self.VIEW_ATTRIBUTES): 2299 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2300 2301 if self._match_text_seq("DATA"): 2302 return self._parse_withdata(no=False) 2303 elif self._match_text_seq("NO", "DATA"): 2304 return self._parse_withdata(no=True) 2305 2306 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2307 return self._parse_serde_properties(with_=True) 2308 2309 if self._match(TokenType.SCHEMA): 2310 return self.expression( 2311 exp.WithSchemaBindingProperty, 2312 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2313 ) 2314 2315 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2316 return self.expression( 2317 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2318 ) 2319 2320 if not self._next: 2321 return None 2322 2323 return self._parse_withisolatedloading() 2324 2325 def _parse_procedure_option(self) -> exp.Expression | None: 2326 if self._match_text_seq("EXECUTE", "AS"): 2327 return self.expression( 2328 exp.ExecuteAsProperty, 2329 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2330 or self._parse_string(), 2331 ) 2332 2333 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2334 2335 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2336 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2337 self._match(TokenType.EQ) 2338 2339 user = self._parse_id_var() 2340 self._match(TokenType.PARAMETER) 2341 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2342 2343 if not user or not host: 2344 return None 2345 2346 return exp.DefinerProperty(this=f"{user}@{host}") 2347 2348 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2349 self._match(TokenType.TABLE) 2350 self._match(TokenType.EQ) 2351 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2352 2353 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2354 return self.expression(exp.LogProperty, no=no) 2355 2356 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2357 return self.expression(exp.JournalProperty, **kwargs) 2358 2359 def _parse_checksum(self) -> exp.ChecksumProperty: 2360 self._match(TokenType.EQ) 2361 2362 on = None 2363 if self._match(TokenType.ON): 2364 on = True 2365 elif self._match_text_seq("OFF"): 2366 on = False 2367 2368 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2369 2370 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2371 return self.expression( 2372 exp.Cluster, 2373 expressions=( 2374 self._parse_wrapped_csv(self._parse_ordered) 2375 if wrapped 2376 else self._parse_csv(self._parse_ordered) 2377 ), 2378 ) 2379 2380 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2381 self._match_text_seq("BY") 2382 2383 self._match_l_paren() 2384 expressions = self._parse_csv(self._parse_column) 2385 self._match_r_paren() 2386 2387 if self._match_text_seq("SORTED", "BY"): 2388 self._match_l_paren() 2389 sorted_by = self._parse_csv(self._parse_ordered) 2390 self._match_r_paren() 2391 else: 2392 sorted_by = None 2393 2394 self._match(TokenType.INTO) 2395 buckets = self._parse_number() 2396 self._match_text_seq("BUCKETS") 2397 2398 return self.expression( 2399 exp.ClusteredByProperty, 2400 expressions=expressions, 2401 sorted_by=sorted_by, 2402 buckets=buckets, 2403 ) 2404 2405 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2406 if not self._match_text_seq("GRANTS"): 2407 self._retreat(self._index - 1) 2408 return None 2409 2410 return self.expression(exp.CopyGrantsProperty) 2411 2412 def _parse_freespace(self) -> exp.FreespaceProperty: 2413 self._match(TokenType.EQ) 2414 return self.expression( 2415 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2416 ) 2417 2418 def _parse_mergeblockratio( 2419 self, no: bool = False, default: bool = False 2420 ) -> exp.MergeBlockRatioProperty: 2421 if self._match(TokenType.EQ): 2422 return self.expression( 2423 exp.MergeBlockRatioProperty, 2424 this=self._parse_number(), 2425 percent=self._match(TokenType.PERCENT), 2426 ) 2427 2428 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2429 2430 def _parse_datablocksize( 2431 self, 2432 default: t.Optional[bool] = None, 2433 minimum: t.Optional[bool] = None, 2434 maximum: t.Optional[bool] = None, 2435 ) -> exp.DataBlocksizeProperty: 2436 self._match(TokenType.EQ) 2437 size = self._parse_number() 2438 2439 units = None 2440 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2441 units = self._prev.text 2442 2443 return self.expression( 2444 exp.DataBlocksizeProperty, 2445 size=size, 2446 units=units, 2447 default=default, 2448 minimum=minimum, 2449 maximum=maximum, 2450 ) 2451 2452 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2453 self._match(TokenType.EQ) 2454 always = self._match_text_seq("ALWAYS") 2455 manual = self._match_text_seq("MANUAL") 2456 never = self._match_text_seq("NEVER") 2457 default = self._match_text_seq("DEFAULT") 2458 2459 autotemp = None 2460 if self._match_text_seq("AUTOTEMP"): 2461 autotemp = self._parse_schema() 2462 2463 return self.expression( 2464 exp.BlockCompressionProperty, 2465 always=always, 2466 manual=manual, 2467 never=never, 2468 default=default, 2469 autotemp=autotemp, 2470 ) 2471 2472 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2473 index = self._index 2474 no = self._match_text_seq("NO") 2475 concurrent = self._match_text_seq("CONCURRENT") 2476 2477 if not self._match_text_seq("ISOLATED", "LOADING"): 2478 self._retreat(index) 2479 return None 2480 2481 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2482 return self.expression( 2483 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2484 ) 2485 2486 def _parse_locking(self) -> exp.LockingProperty: 2487 if self._match(TokenType.TABLE): 2488 kind = "TABLE" 2489 elif self._match(TokenType.VIEW): 2490 kind = "VIEW" 2491 elif self._match(TokenType.ROW): 2492 kind = "ROW" 2493 elif self._match_text_seq("DATABASE"): 2494 kind = "DATABASE" 2495 else: 2496 kind = None 2497 2498 if kind in ("DATABASE", "TABLE", "VIEW"): 2499 this = self._parse_table_parts() 2500 else: 2501 this = None 2502 2503 if self._match(TokenType.FOR): 2504 for_or_in = "FOR" 2505 elif self._match(TokenType.IN): 2506 for_or_in = "IN" 2507 else: 2508 for_or_in = None 2509 2510 if self._match_text_seq("ACCESS"): 2511 lock_type = "ACCESS" 2512 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2513 lock_type = "EXCLUSIVE" 2514 elif self._match_text_seq("SHARE"): 2515 lock_type = "SHARE" 2516 elif self._match_text_seq("READ"): 2517 lock_type = "READ" 2518 elif self._match_text_seq("WRITE"): 2519 lock_type = "WRITE" 2520 elif self._match_text_seq("CHECKSUM"): 2521 lock_type = "CHECKSUM" 2522 else: 2523 lock_type = None 2524 2525 override = self._match_text_seq("OVERRIDE") 2526 2527 return self.expression( 2528 exp.LockingProperty, 2529 this=this, 2530 kind=kind, 2531 for_or_in=for_or_in, 2532 lock_type=lock_type, 2533 override=override, 2534 ) 2535 2536 def _parse_partition_by(self) -> t.List[exp.Expression]: 2537 if self._match(TokenType.PARTITION_BY): 2538 return self._parse_csv(self._parse_assignment) 2539 return [] 2540 2541 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2542 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2543 if self._match_text_seq("MINVALUE"): 2544 return exp.var("MINVALUE") 2545 if self._match_text_seq("MAXVALUE"): 2546 return exp.var("MAXVALUE") 2547 return self._parse_bitwise() 2548 2549 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2550 expression = None 2551 from_expressions = None 2552 to_expressions = None 2553 2554 if self._match(TokenType.IN): 2555 this = self._parse_wrapped_csv(self._parse_bitwise) 2556 elif self._match(TokenType.FROM): 2557 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2558 self._match_text_seq("TO") 2559 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2560 elif self._match_text_seq("WITH", "(", "MODULUS"): 2561 this = self._parse_number() 2562 self._match_text_seq(",", "REMAINDER") 2563 expression = self._parse_number() 2564 self._match_r_paren() 2565 else: 2566 self.raise_error("Failed to parse partition bound spec.") 2567 2568 return self.expression( 2569 exp.PartitionBoundSpec, 2570 this=this, 2571 expression=expression, 2572 from_expressions=from_expressions, 2573 to_expressions=to_expressions, 2574 ) 2575 2576 # https://www.postgresql.org/docs/current/sql-createtable.html 2577 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2578 if not self._match_text_seq("OF"): 2579 self._retreat(self._index - 1) 2580 return None 2581 2582 this = self._parse_table(schema=True) 2583 2584 if self._match(TokenType.DEFAULT): 2585 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2586 elif self._match_text_seq("FOR", "VALUES"): 2587 expression = self._parse_partition_bound_spec() 2588 else: 2589 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2590 2591 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2592 2593 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2594 self._match(TokenType.EQ) 2595 return self.expression( 2596 exp.PartitionedByProperty, 2597 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2598 ) 2599 2600 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2601 if self._match_text_seq("AND", "STATISTICS"): 2602 statistics = True 2603 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2604 statistics = False 2605 else: 2606 statistics = None 2607 2608 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2609 2610 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2611 if self._match_text_seq("SQL"): 2612 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2613 return None 2614 2615 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2616 if self._match_text_seq("SQL", "DATA"): 2617 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2618 return None 2619 2620 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2621 if self._match_text_seq("PRIMARY", "INDEX"): 2622 return exp.NoPrimaryIndexProperty() 2623 if self._match_text_seq("SQL"): 2624 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2625 return None 2626 2627 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2628 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2629 return exp.OnCommitProperty() 2630 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2631 return exp.OnCommitProperty(delete=True) 2632 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2633 2634 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2635 if self._match_text_seq("SQL", "DATA"): 2636 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2637 return None 2638 2639 def _parse_distkey(self) -> exp.DistKeyProperty: 2640 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2641 2642 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2643 table = self._parse_table(schema=True) 2644 2645 options = [] 2646 while self._match_texts(("INCLUDING", "EXCLUDING")): 2647 this = self._prev.text.upper() 2648 2649 id_var = self._parse_id_var() 2650 if not id_var: 2651 return None 2652 2653 options.append( 2654 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2655 ) 2656 2657 return self.expression(exp.LikeProperty, this=table, expressions=options) 2658 2659 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2660 return self.expression( 2661 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2662 ) 2663 2664 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2665 self._match(TokenType.EQ) 2666 return self.expression( 2667 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2668 ) 2669 2670 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2671 self._match_text_seq("WITH", "CONNECTION") 2672 return self.expression( 2673 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2674 ) 2675 2676 def _parse_returns(self) -> exp.ReturnsProperty: 2677 value: t.Optional[exp.Expression] 2678 null = None 2679 is_table = self._match(TokenType.TABLE) 2680 2681 if is_table: 2682 if self._match(TokenType.LT): 2683 value = self.expression( 2684 exp.Schema, 2685 this="TABLE", 2686 expressions=self._parse_csv(self._parse_struct_types), 2687 ) 2688 if not self._match(TokenType.GT): 2689 self.raise_error("Expecting >") 2690 else: 2691 value = self._parse_schema(exp.var("TABLE")) 2692 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2693 null = True 2694 value = None 2695 else: 2696 value = self._parse_types() 2697 2698 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2699 2700 def _parse_describe(self) -> exp.Describe: 2701 kind = self._match_set(self.CREATABLES) and self._prev.text 2702 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2703 if self._match(TokenType.DOT): 2704 style = None 2705 self._retreat(self._index - 2) 2706 2707 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2708 2709 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2710 this = self._parse_statement() 2711 else: 2712 this = self._parse_table(schema=True) 2713 2714 properties = self._parse_properties() 2715 expressions = properties.expressions if properties else None 2716 partition = self._parse_partition() 2717 return self.expression( 2718 exp.Describe, 2719 this=this, 2720 style=style, 2721 kind=kind, 2722 expressions=expressions, 2723 partition=partition, 2724 format=format, 2725 ) 2726 2727 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2728 kind = self._prev.text.upper() 2729 expressions = [] 2730 2731 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2732 if self._match(TokenType.WHEN): 2733 expression = self._parse_disjunction() 2734 self._match(TokenType.THEN) 2735 else: 2736 expression = None 2737 2738 else_ = self._match(TokenType.ELSE) 2739 2740 if not self._match(TokenType.INTO): 2741 return None 2742 2743 return self.expression( 2744 exp.ConditionalInsert, 2745 this=self.expression( 2746 exp.Insert, 2747 this=self._parse_table(schema=True), 2748 expression=self._parse_derived_table_values(), 2749 ), 2750 expression=expression, 2751 else_=else_, 2752 ) 2753 2754 expression = parse_conditional_insert() 2755 while expression is not None: 2756 expressions.append(expression) 2757 expression = parse_conditional_insert() 2758 2759 return self.expression( 2760 exp.MultitableInserts, 2761 kind=kind, 2762 comments=comments, 2763 expressions=expressions, 2764 source=self._parse_table(), 2765 ) 2766 2767 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2768 comments = [] 2769 hint = self._parse_hint() 2770 overwrite = self._match(TokenType.OVERWRITE) 2771 ignore = self._match(TokenType.IGNORE) 2772 local = self._match_text_seq("LOCAL") 2773 alternative = None 2774 is_function = None 2775 2776 if self._match_text_seq("DIRECTORY"): 2777 this: t.Optional[exp.Expression] = self.expression( 2778 exp.Directory, 2779 this=self._parse_var_or_string(), 2780 local=local, 2781 row_format=self._parse_row_format(match_row=True), 2782 ) 2783 else: 2784 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2785 comments += ensure_list(self._prev_comments) 2786 return self._parse_multitable_inserts(comments) 2787 2788 if self._match(TokenType.OR): 2789 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2790 2791 self._match(TokenType.INTO) 2792 comments += ensure_list(self._prev_comments) 2793 self._match(TokenType.TABLE) 2794 is_function = self._match(TokenType.FUNCTION) 2795 2796 this = ( 2797 self._parse_table(schema=True, parse_partition=True) 2798 if not is_function 2799 else self._parse_function() 2800 ) 2801 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2802 this.set("alias", self._parse_table_alias()) 2803 2804 returning = self._parse_returning() 2805 2806 return self.expression( 2807 exp.Insert, 2808 comments=comments, 2809 hint=hint, 2810 is_function=is_function, 2811 this=this, 2812 stored=self._match_text_seq("STORED") and self._parse_stored(), 2813 by_name=self._match_text_seq("BY", "NAME"), 2814 exists=self._parse_exists(), 2815 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2816 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2817 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2818 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2819 conflict=self._parse_on_conflict(), 2820 returning=returning or self._parse_returning(), 2821 overwrite=overwrite, 2822 alternative=alternative, 2823 ignore=ignore, 2824 source=self._match(TokenType.TABLE) and self._parse_table(), 2825 ) 2826 2827 def _parse_kill(self) -> exp.Kill: 2828 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2829 2830 return self.expression( 2831 exp.Kill, 2832 this=self._parse_primary(), 2833 kind=kind, 2834 ) 2835 2836 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2837 conflict = self._match_text_seq("ON", "CONFLICT") 2838 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2839 2840 if not conflict and not duplicate: 2841 return None 2842 2843 conflict_keys = None 2844 constraint = None 2845 2846 if conflict: 2847 if self._match_text_seq("ON", "CONSTRAINT"): 2848 constraint = self._parse_id_var() 2849 elif self._match(TokenType.L_PAREN): 2850 conflict_keys = self._parse_csv(self._parse_id_var) 2851 self._match_r_paren() 2852 2853 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2854 if self._prev.token_type == TokenType.UPDATE: 2855 self._match(TokenType.SET) 2856 expressions = self._parse_csv(self._parse_equality) 2857 else: 2858 expressions = None 2859 2860 return self.expression( 2861 exp.OnConflict, 2862 duplicate=duplicate, 2863 expressions=expressions, 2864 action=action, 2865 conflict_keys=conflict_keys, 2866 constraint=constraint, 2867 where=self._parse_where(), 2868 ) 2869 2870 def _parse_returning(self) -> t.Optional[exp.Returning]: 2871 if not self._match(TokenType.RETURNING): 2872 return None 2873 return self.expression( 2874 exp.Returning, 2875 expressions=self._parse_csv(self._parse_expression), 2876 into=self._match(TokenType.INTO) and self._parse_table_part(), 2877 ) 2878 2879 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2880 if not self._match(TokenType.FORMAT): 2881 return None 2882 return self._parse_row_format() 2883 2884 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2885 index = self._index 2886 with_ = with_ or self._match_text_seq("WITH") 2887 2888 if not self._match(TokenType.SERDE_PROPERTIES): 2889 self._retreat(index) 2890 return None 2891 return self.expression( 2892 exp.SerdeProperties, 2893 **{ # type: ignore 2894 "expressions": self._parse_wrapped_properties(), 2895 "with": with_, 2896 }, 2897 ) 2898 2899 def _parse_row_format( 2900 self, match_row: bool = False 2901 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2902 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2903 return None 2904 2905 if self._match_text_seq("SERDE"): 2906 this = self._parse_string() 2907 2908 serde_properties = self._parse_serde_properties() 2909 2910 return self.expression( 2911 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2912 ) 2913 2914 self._match_text_seq("DELIMITED") 2915 2916 kwargs = {} 2917 2918 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2919 kwargs["fields"] = self._parse_string() 2920 if self._match_text_seq("ESCAPED", "BY"): 2921 kwargs["escaped"] = self._parse_string() 2922 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2923 kwargs["collection_items"] = self._parse_string() 2924 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2925 kwargs["map_keys"] = self._parse_string() 2926 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2927 kwargs["lines"] = self._parse_string() 2928 if self._match_text_seq("NULL", "DEFINED", "AS"): 2929 kwargs["null"] = self._parse_string() 2930 2931 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2932 2933 def _parse_load(self) -> exp.LoadData | exp.Command: 2934 if self._match_text_seq("DATA"): 2935 local = self._match_text_seq("LOCAL") 2936 self._match_text_seq("INPATH") 2937 inpath = self._parse_string() 2938 overwrite = self._match(TokenType.OVERWRITE) 2939 self._match_pair(TokenType.INTO, TokenType.TABLE) 2940 2941 return self.expression( 2942 exp.LoadData, 2943 this=self._parse_table(schema=True), 2944 local=local, 2945 overwrite=overwrite, 2946 inpath=inpath, 2947 partition=self._parse_partition(), 2948 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2949 serde=self._match_text_seq("SERDE") and self._parse_string(), 2950 ) 2951 return self._parse_as_command(self._prev) 2952 2953 def _parse_delete(self) -> exp.Delete: 2954 # This handles MySQL's "Multiple-Table Syntax" 2955 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2956 tables = None 2957 if not self._match(TokenType.FROM, advance=False): 2958 tables = self._parse_csv(self._parse_table) or None 2959 2960 returning = self._parse_returning() 2961 2962 return self.expression( 2963 exp.Delete, 2964 tables=tables, 2965 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2966 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2967 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2968 where=self._parse_where(), 2969 returning=returning or self._parse_returning(), 2970 limit=self._parse_limit(), 2971 ) 2972 2973 def _parse_update(self) -> exp.Update: 2974 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2975 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2976 returning = self._parse_returning() 2977 return self.expression( 2978 exp.Update, 2979 **{ # type: ignore 2980 "this": this, 2981 "expressions": expressions, 2982 "from": self._parse_from(joins=True), 2983 "where": self._parse_where(), 2984 "returning": returning or self._parse_returning(), 2985 "order": self._parse_order(), 2986 "limit": self._parse_limit(), 2987 }, 2988 ) 2989 2990 def _parse_use(self) -> exp.Use: 2991 return self.expression( 2992 exp.Use, 2993 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 2994 this=self._parse_table(schema=False), 2995 ) 2996 2997 def _parse_uncache(self) -> exp.Uncache: 2998 if not self._match(TokenType.TABLE): 2999 self.raise_error("Expecting TABLE after UNCACHE") 3000 3001 return self.expression( 3002 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3003 ) 3004 3005 def _parse_cache(self) -> exp.Cache: 3006 lazy = self._match_text_seq("LAZY") 3007 self._match(TokenType.TABLE) 3008 table = self._parse_table(schema=True) 3009 3010 options = [] 3011 if self._match_text_seq("OPTIONS"): 3012 self._match_l_paren() 3013 k = self._parse_string() 3014 self._match(TokenType.EQ) 3015 v = self._parse_string() 3016 options = [k, v] 3017 self._match_r_paren() 3018 3019 self._match(TokenType.ALIAS) 3020 return self.expression( 3021 exp.Cache, 3022 this=table, 3023 lazy=lazy, 3024 options=options, 3025 expression=self._parse_select(nested=True), 3026 ) 3027 3028 def _parse_partition(self) -> t.Optional[exp.Partition]: 3029 if not self._match_texts(self.PARTITION_KEYWORDS): 3030 return None 3031 3032 return self.expression( 3033 exp.Partition, 3034 subpartition=self._prev.text.upper() == "SUBPARTITION", 3035 expressions=self._parse_wrapped_csv(self._parse_assignment), 3036 ) 3037 3038 def _parse_value(self) -> t.Optional[exp.Tuple]: 3039 def _parse_value_expression() -> t.Optional[exp.Expression]: 3040 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3041 return exp.var(self._prev.text.upper()) 3042 return self._parse_expression() 3043 3044 if self._match(TokenType.L_PAREN): 3045 expressions = self._parse_csv(_parse_value_expression) 3046 self._match_r_paren() 3047 return self.expression(exp.Tuple, expressions=expressions) 3048 3049 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3050 expression = self._parse_expression() 3051 if expression: 3052 return self.expression(exp.Tuple, expressions=[expression]) 3053 return None 3054 3055 def _parse_projections(self) -> t.List[exp.Expression]: 3056 return self._parse_expressions() 3057 3058 def _parse_select( 3059 self, 3060 nested: bool = False, 3061 table: bool = False, 3062 parse_subquery_alias: bool = True, 3063 parse_set_operation: bool = True, 3064 ) -> t.Optional[exp.Expression]: 3065 cte = self._parse_with() 3066 3067 if cte: 3068 this = self._parse_statement() 3069 3070 if not this: 3071 self.raise_error("Failed to parse any statement following CTE") 3072 return cte 3073 3074 if "with" in this.arg_types: 3075 this.set("with", cte) 3076 else: 3077 self.raise_error(f"{this.key} does not support CTE") 3078 this = cte 3079 3080 return this 3081 3082 # duckdb supports leading with FROM x 3083 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3084 3085 if self._match(TokenType.SELECT): 3086 comments = self._prev_comments 3087 3088 hint = self._parse_hint() 3089 3090 if self._next and not self._next.token_type == TokenType.DOT: 3091 all_ = self._match(TokenType.ALL) 3092 distinct = self._match_set(self.DISTINCT_TOKENS) 3093 else: 3094 all_, distinct = None, None 3095 3096 kind = ( 3097 self._match(TokenType.ALIAS) 3098 and self._match_texts(("STRUCT", "VALUE")) 3099 and self._prev.text.upper() 3100 ) 3101 3102 if distinct: 3103 distinct = self.expression( 3104 exp.Distinct, 3105 on=self._parse_value() if self._match(TokenType.ON) else None, 3106 ) 3107 3108 if all_ and distinct: 3109 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3110 3111 operation_modifiers = [] 3112 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3113 operation_modifiers.append(exp.var(self._prev.text.upper())) 3114 3115 limit = self._parse_limit(top=True) 3116 projections = self._parse_projections() 3117 3118 this = self.expression( 3119 exp.Select, 3120 kind=kind, 3121 hint=hint, 3122 distinct=distinct, 3123 expressions=projections, 3124 limit=limit, 3125 operation_modifiers=operation_modifiers or None, 3126 ) 3127 this.comments = comments 3128 3129 into = self._parse_into() 3130 if into: 3131 this.set("into", into) 3132 3133 if not from_: 3134 from_ = self._parse_from() 3135 3136 if from_: 3137 this.set("from", from_) 3138 3139 this = self._parse_query_modifiers(this) 3140 elif (table or nested) and self._match(TokenType.L_PAREN): 3141 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3142 this = self._parse_simplified_pivot( 3143 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3144 ) 3145 elif self._match(TokenType.FROM): 3146 from_ = self._parse_from(skip_from_token=True) 3147 # Support parentheses for duckdb FROM-first syntax 3148 select = self._parse_select() 3149 if select: 3150 select.set("from", from_) 3151 this = select 3152 else: 3153 this = exp.select("*").from_(t.cast(exp.From, from_)) 3154 else: 3155 this = ( 3156 self._parse_table() 3157 if table 3158 else self._parse_select(nested=True, parse_set_operation=False) 3159 ) 3160 3161 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3162 # in case a modifier (e.g. join) is following 3163 if table and isinstance(this, exp.Values) and this.alias: 3164 alias = this.args["alias"].pop() 3165 this = exp.Table(this=this, alias=alias) 3166 3167 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3168 3169 self._match_r_paren() 3170 3171 # We return early here so that the UNION isn't attached to the subquery by the 3172 # following call to _parse_set_operations, but instead becomes the parent node 3173 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3174 elif self._match(TokenType.VALUES, advance=False): 3175 this = self._parse_derived_table_values() 3176 elif from_: 3177 this = exp.select("*").from_(from_.this, copy=False) 3178 elif self._match(TokenType.SUMMARIZE): 3179 table = self._match(TokenType.TABLE) 3180 this = self._parse_select() or self._parse_string() or self._parse_table() 3181 return self.expression(exp.Summarize, this=this, table=table) 3182 elif self._match(TokenType.DESCRIBE): 3183 this = self._parse_describe() 3184 elif self._match_text_seq("STREAM"): 3185 this = self._parse_function() 3186 if this: 3187 this = self.expression(exp.Stream, this=this) 3188 else: 3189 self._retreat(self._index - 1) 3190 else: 3191 this = None 3192 3193 return self._parse_set_operations(this) if parse_set_operation else this 3194 3195 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3196 self._match_text_seq("SEARCH") 3197 3198 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3199 3200 if not kind: 3201 return None 3202 3203 self._match_text_seq("FIRST", "BY") 3204 3205 return self.expression( 3206 exp.RecursiveWithSearch, 3207 kind=kind, 3208 this=self._parse_id_var(), 3209 expression=self._match_text_seq("SET") and self._parse_id_var(), 3210 using=self._match_text_seq("USING") and self._parse_id_var(), 3211 ) 3212 3213 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3214 if not skip_with_token and not self._match(TokenType.WITH): 3215 return None 3216 3217 comments = self._prev_comments 3218 recursive = self._match(TokenType.RECURSIVE) 3219 3220 last_comments = None 3221 expressions = [] 3222 while True: 3223 cte = self._parse_cte() 3224 if isinstance(cte, exp.CTE): 3225 expressions.append(cte) 3226 if last_comments: 3227 cte.add_comments(last_comments) 3228 3229 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3230 break 3231 else: 3232 self._match(TokenType.WITH) 3233 3234 last_comments = self._prev_comments 3235 3236 return self.expression( 3237 exp.With, 3238 comments=comments, 3239 expressions=expressions, 3240 recursive=recursive, 3241 search=self._parse_recursive_with_search(), 3242 ) 3243 3244 def _parse_cte(self) -> t.Optional[exp.CTE]: 3245 index = self._index 3246 3247 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3248 if not alias or not alias.this: 3249 self.raise_error("Expected CTE to have alias") 3250 3251 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3252 self._retreat(index) 3253 return None 3254 3255 comments = self._prev_comments 3256 3257 if self._match_text_seq("NOT", "MATERIALIZED"): 3258 materialized = False 3259 elif self._match_text_seq("MATERIALIZED"): 3260 materialized = True 3261 else: 3262 materialized = None 3263 3264 cte = self.expression( 3265 exp.CTE, 3266 this=self._parse_wrapped(self._parse_statement), 3267 alias=alias, 3268 materialized=materialized, 3269 comments=comments, 3270 ) 3271 3272 if isinstance(cte.this, exp.Values): 3273 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3274 3275 return cte 3276 3277 def _parse_table_alias( 3278 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3279 ) -> t.Optional[exp.TableAlias]: 3280 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3281 # so this section tries to parse the clause version and if it fails, it treats the token 3282 # as an identifier (alias) 3283 if self._can_parse_limit_or_offset(): 3284 return None 3285 3286 any_token = self._match(TokenType.ALIAS) 3287 alias = ( 3288 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3289 or self._parse_string_as_identifier() 3290 ) 3291 3292 index = self._index 3293 if self._match(TokenType.L_PAREN): 3294 columns = self._parse_csv(self._parse_function_parameter) 3295 self._match_r_paren() if columns else self._retreat(index) 3296 else: 3297 columns = None 3298 3299 if not alias and not columns: 3300 return None 3301 3302 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3303 3304 # We bubble up comments from the Identifier to the TableAlias 3305 if isinstance(alias, exp.Identifier): 3306 table_alias.add_comments(alias.pop_comments()) 3307 3308 return table_alias 3309 3310 def _parse_subquery( 3311 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3312 ) -> t.Optional[exp.Subquery]: 3313 if not this: 3314 return None 3315 3316 return self.expression( 3317 exp.Subquery, 3318 this=this, 3319 pivots=self._parse_pivots(), 3320 alias=self._parse_table_alias() if parse_alias else None, 3321 sample=self._parse_table_sample(), 3322 ) 3323 3324 def _implicit_unnests_to_explicit(self, this: E) -> E: 3325 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3326 3327 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3328 for i, join in enumerate(this.args.get("joins") or []): 3329 table = join.this 3330 normalized_table = table.copy() 3331 normalized_table.meta["maybe_column"] = True 3332 normalized_table = _norm(normalized_table, dialect=self.dialect) 3333 3334 if isinstance(table, exp.Table) and not join.args.get("on"): 3335 if normalized_table.parts[0].name in refs: 3336 table_as_column = table.to_column() 3337 unnest = exp.Unnest(expressions=[table_as_column]) 3338 3339 # Table.to_column creates a parent Alias node that we want to convert to 3340 # a TableAlias and attach to the Unnest, so it matches the parser's output 3341 if isinstance(table.args.get("alias"), exp.TableAlias): 3342 table_as_column.replace(table_as_column.this) 3343 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3344 3345 table.replace(unnest) 3346 3347 refs.add(normalized_table.alias_or_name) 3348 3349 return this 3350 3351 def _parse_query_modifiers( 3352 self, this: t.Optional[exp.Expression] 3353 ) -> t.Optional[exp.Expression]: 3354 if isinstance(this, (exp.Query, exp.Table)): 3355 for join in self._parse_joins(): 3356 this.append("joins", join) 3357 for lateral in iter(self._parse_lateral, None): 3358 this.append("laterals", lateral) 3359 3360 while True: 3361 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3362 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3363 key, expression = parser(self) 3364 3365 if expression: 3366 this.set(key, expression) 3367 if key == "limit": 3368 offset = expression.args.pop("offset", None) 3369 3370 if offset: 3371 offset = exp.Offset(expression=offset) 3372 this.set("offset", offset) 3373 3374 limit_by_expressions = expression.expressions 3375 expression.set("expressions", None) 3376 offset.set("expressions", limit_by_expressions) 3377 continue 3378 break 3379 3380 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3381 this = self._implicit_unnests_to_explicit(this) 3382 3383 return this 3384 3385 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3386 start = self._curr 3387 while self._curr: 3388 self._advance() 3389 3390 end = self._tokens[self._index - 1] 3391 return exp.Hint(expressions=[self._find_sql(start, end)]) 3392 3393 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3394 return self._parse_function_call() 3395 3396 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3397 start_index = self._index 3398 should_fallback_to_string = False 3399 3400 hints = [] 3401 try: 3402 for hint in iter( 3403 lambda: self._parse_csv( 3404 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3405 ), 3406 [], 3407 ): 3408 hints.extend(hint) 3409 except ParseError: 3410 should_fallback_to_string = True 3411 3412 if should_fallback_to_string or self._curr: 3413 self._retreat(start_index) 3414 return self._parse_hint_fallback_to_string() 3415 3416 return self.expression(exp.Hint, expressions=hints) 3417 3418 def _parse_hint(self) -> t.Optional[exp.Hint]: 3419 if self._match(TokenType.HINT) and self._prev_comments: 3420 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3421 3422 return None 3423 3424 def _parse_into(self) -> t.Optional[exp.Into]: 3425 if not self._match(TokenType.INTO): 3426 return None 3427 3428 temp = self._match(TokenType.TEMPORARY) 3429 unlogged = self._match_text_seq("UNLOGGED") 3430 self._match(TokenType.TABLE) 3431 3432 return self.expression( 3433 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3434 ) 3435 3436 def _parse_from( 3437 self, joins: bool = False, skip_from_token: bool = False 3438 ) -> t.Optional[exp.From]: 3439 if not skip_from_token and not self._match(TokenType.FROM): 3440 return None 3441 3442 return self.expression( 3443 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3444 ) 3445 3446 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3447 return self.expression( 3448 exp.MatchRecognizeMeasure, 3449 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3450 this=self._parse_expression(), 3451 ) 3452 3453 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3454 if not self._match(TokenType.MATCH_RECOGNIZE): 3455 return None 3456 3457 self._match_l_paren() 3458 3459 partition = self._parse_partition_by() 3460 order = self._parse_order() 3461 3462 measures = ( 3463 self._parse_csv(self._parse_match_recognize_measure) 3464 if self._match_text_seq("MEASURES") 3465 else None 3466 ) 3467 3468 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3469 rows = exp.var("ONE ROW PER MATCH") 3470 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3471 text = "ALL ROWS PER MATCH" 3472 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3473 text += " SHOW EMPTY MATCHES" 3474 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3475 text += " OMIT EMPTY MATCHES" 3476 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3477 text += " WITH UNMATCHED ROWS" 3478 rows = exp.var(text) 3479 else: 3480 rows = None 3481 3482 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3483 text = "AFTER MATCH SKIP" 3484 if self._match_text_seq("PAST", "LAST", "ROW"): 3485 text += " PAST LAST ROW" 3486 elif self._match_text_seq("TO", "NEXT", "ROW"): 3487 text += " TO NEXT ROW" 3488 elif self._match_text_seq("TO", "FIRST"): 3489 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3490 elif self._match_text_seq("TO", "LAST"): 3491 text += f" TO LAST {self._advance_any().text}" # type: ignore 3492 after = exp.var(text) 3493 else: 3494 after = None 3495 3496 if self._match_text_seq("PATTERN"): 3497 self._match_l_paren() 3498 3499 if not self._curr: 3500 self.raise_error("Expecting )", self._curr) 3501 3502 paren = 1 3503 start = self._curr 3504 3505 while self._curr and paren > 0: 3506 if self._curr.token_type == TokenType.L_PAREN: 3507 paren += 1 3508 if self._curr.token_type == TokenType.R_PAREN: 3509 paren -= 1 3510 3511 end = self._prev 3512 self._advance() 3513 3514 if paren > 0: 3515 self.raise_error("Expecting )", self._curr) 3516 3517 pattern = exp.var(self._find_sql(start, end)) 3518 else: 3519 pattern = None 3520 3521 define = ( 3522 self._parse_csv(self._parse_name_as_expression) 3523 if self._match_text_seq("DEFINE") 3524 else None 3525 ) 3526 3527 self._match_r_paren() 3528 3529 return self.expression( 3530 exp.MatchRecognize, 3531 partition_by=partition, 3532 order=order, 3533 measures=measures, 3534 rows=rows, 3535 after=after, 3536 pattern=pattern, 3537 define=define, 3538 alias=self._parse_table_alias(), 3539 ) 3540 3541 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3542 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3543 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3544 cross_apply = False 3545 3546 if cross_apply is not None: 3547 this = self._parse_select(table=True) 3548 view = None 3549 outer = None 3550 elif self._match(TokenType.LATERAL): 3551 this = self._parse_select(table=True) 3552 view = self._match(TokenType.VIEW) 3553 outer = self._match(TokenType.OUTER) 3554 else: 3555 return None 3556 3557 if not this: 3558 this = ( 3559 self._parse_unnest() 3560 or self._parse_function() 3561 or self._parse_id_var(any_token=False) 3562 ) 3563 3564 while self._match(TokenType.DOT): 3565 this = exp.Dot( 3566 this=this, 3567 expression=self._parse_function() or self._parse_id_var(any_token=False), 3568 ) 3569 3570 if view: 3571 table = self._parse_id_var(any_token=False) 3572 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3573 table_alias: t.Optional[exp.TableAlias] = self.expression( 3574 exp.TableAlias, this=table, columns=columns 3575 ) 3576 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3577 # We move the alias from the lateral's child node to the lateral itself 3578 table_alias = this.args["alias"].pop() 3579 else: 3580 table_alias = self._parse_table_alias() 3581 3582 return self.expression( 3583 exp.Lateral, 3584 this=this, 3585 view=view, 3586 outer=outer, 3587 alias=table_alias, 3588 cross_apply=cross_apply, 3589 ) 3590 3591 def _parse_join_parts( 3592 self, 3593 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3594 return ( 3595 self._match_set(self.JOIN_METHODS) and self._prev, 3596 self._match_set(self.JOIN_SIDES) and self._prev, 3597 self._match_set(self.JOIN_KINDS) and self._prev, 3598 ) 3599 3600 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3601 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3602 this = self._parse_column() 3603 if isinstance(this, exp.Column): 3604 return this.this 3605 return this 3606 3607 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3608 3609 def _parse_join( 3610 self, skip_join_token: bool = False, parse_bracket: bool = False 3611 ) -> t.Optional[exp.Join]: 3612 if self._match(TokenType.COMMA): 3613 table = self._try_parse(self._parse_table) 3614 if table: 3615 return self.expression(exp.Join, this=table) 3616 return None 3617 3618 index = self._index 3619 method, side, kind = self._parse_join_parts() 3620 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3621 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3622 3623 if not skip_join_token and not join: 3624 self._retreat(index) 3625 kind = None 3626 method = None 3627 side = None 3628 3629 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3630 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3631 3632 if not skip_join_token and not join and not outer_apply and not cross_apply: 3633 return None 3634 3635 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3636 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3637 kwargs["expressions"] = self._parse_csv( 3638 lambda: self._parse_table(parse_bracket=parse_bracket) 3639 ) 3640 3641 if method: 3642 kwargs["method"] = method.text 3643 if side: 3644 kwargs["side"] = side.text 3645 if kind: 3646 kwargs["kind"] = kind.text 3647 if hint: 3648 kwargs["hint"] = hint 3649 3650 if self._match(TokenType.MATCH_CONDITION): 3651 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3652 3653 if self._match(TokenType.ON): 3654 kwargs["on"] = self._parse_assignment() 3655 elif self._match(TokenType.USING): 3656 kwargs["using"] = self._parse_using_identifiers() 3657 elif ( 3658 not (outer_apply or cross_apply) 3659 and not isinstance(kwargs["this"], exp.Unnest) 3660 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3661 ): 3662 index = self._index 3663 joins: t.Optional[list] = list(self._parse_joins()) 3664 3665 if joins and self._match(TokenType.ON): 3666 kwargs["on"] = self._parse_assignment() 3667 elif joins and self._match(TokenType.USING): 3668 kwargs["using"] = self._parse_using_identifiers() 3669 else: 3670 joins = None 3671 self._retreat(index) 3672 3673 kwargs["this"].set("joins", joins if joins else None) 3674 3675 comments = [c for token in (method, side, kind) if token for c in token.comments] 3676 return self.expression(exp.Join, comments=comments, **kwargs) 3677 3678 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3679 this = self._parse_assignment() 3680 3681 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3682 return this 3683 3684 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3685 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3686 3687 return this 3688 3689 def _parse_index_params(self) -> exp.IndexParameters: 3690 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3691 3692 if self._match(TokenType.L_PAREN, advance=False): 3693 columns = self._parse_wrapped_csv(self._parse_with_operator) 3694 else: 3695 columns = None 3696 3697 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3698 partition_by = self._parse_partition_by() 3699 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3700 tablespace = ( 3701 self._parse_var(any_token=True) 3702 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3703 else None 3704 ) 3705 where = self._parse_where() 3706 3707 on = self._parse_field() if self._match(TokenType.ON) else None 3708 3709 return self.expression( 3710 exp.IndexParameters, 3711 using=using, 3712 columns=columns, 3713 include=include, 3714 partition_by=partition_by, 3715 where=where, 3716 with_storage=with_storage, 3717 tablespace=tablespace, 3718 on=on, 3719 ) 3720 3721 def _parse_index( 3722 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3723 ) -> t.Optional[exp.Index]: 3724 if index or anonymous: 3725 unique = None 3726 primary = None 3727 amp = None 3728 3729 self._match(TokenType.ON) 3730 self._match(TokenType.TABLE) # hive 3731 table = self._parse_table_parts(schema=True) 3732 else: 3733 unique = self._match(TokenType.UNIQUE) 3734 primary = self._match_text_seq("PRIMARY") 3735 amp = self._match_text_seq("AMP") 3736 3737 if not self._match(TokenType.INDEX): 3738 return None 3739 3740 index = self._parse_id_var() 3741 table = None 3742 3743 params = self._parse_index_params() 3744 3745 return self.expression( 3746 exp.Index, 3747 this=index, 3748 table=table, 3749 unique=unique, 3750 primary=primary, 3751 amp=amp, 3752 params=params, 3753 ) 3754 3755 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3756 hints: t.List[exp.Expression] = [] 3757 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3758 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3759 hints.append( 3760 self.expression( 3761 exp.WithTableHint, 3762 expressions=self._parse_csv( 3763 lambda: self._parse_function() or self._parse_var(any_token=True) 3764 ), 3765 ) 3766 ) 3767 self._match_r_paren() 3768 else: 3769 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3770 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3771 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3772 3773 self._match_set((TokenType.INDEX, TokenType.KEY)) 3774 if self._match(TokenType.FOR): 3775 hint.set("target", self._advance_any() and self._prev.text.upper()) 3776 3777 hint.set("expressions", self._parse_wrapped_id_vars()) 3778 hints.append(hint) 3779 3780 return hints or None 3781 3782 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3783 return ( 3784 (not schema and self._parse_function(optional_parens=False)) 3785 or self._parse_id_var(any_token=False) 3786 or self._parse_string_as_identifier() 3787 or self._parse_placeholder() 3788 ) 3789 3790 def _parse_table_parts( 3791 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3792 ) -> exp.Table: 3793 catalog = None 3794 db = None 3795 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3796 3797 while self._match(TokenType.DOT): 3798 if catalog: 3799 # This allows nesting the table in arbitrarily many dot expressions if needed 3800 table = self.expression( 3801 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3802 ) 3803 else: 3804 catalog = db 3805 db = table 3806 # "" used for tsql FROM a..b case 3807 table = self._parse_table_part(schema=schema) or "" 3808 3809 if ( 3810 wildcard 3811 and self._is_connected() 3812 and (isinstance(table, exp.Identifier) or not table) 3813 and self._match(TokenType.STAR) 3814 ): 3815 if isinstance(table, exp.Identifier): 3816 table.args["this"] += "*" 3817 else: 3818 table = exp.Identifier(this="*") 3819 3820 # We bubble up comments from the Identifier to the Table 3821 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3822 3823 if is_db_reference: 3824 catalog = db 3825 db = table 3826 table = None 3827 3828 if not table and not is_db_reference: 3829 self.raise_error(f"Expected table name but got {self._curr}") 3830 if not db and is_db_reference: 3831 self.raise_error(f"Expected database name but got {self._curr}") 3832 3833 table = self.expression( 3834 exp.Table, 3835 comments=comments, 3836 this=table, 3837 db=db, 3838 catalog=catalog, 3839 ) 3840 3841 changes = self._parse_changes() 3842 if changes: 3843 table.set("changes", changes) 3844 3845 at_before = self._parse_historical_data() 3846 if at_before: 3847 table.set("when", at_before) 3848 3849 pivots = self._parse_pivots() 3850 if pivots: 3851 table.set("pivots", pivots) 3852 3853 return table 3854 3855 def _parse_table( 3856 self, 3857 schema: bool = False, 3858 joins: bool = False, 3859 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3860 parse_bracket: bool = False, 3861 is_db_reference: bool = False, 3862 parse_partition: bool = False, 3863 ) -> t.Optional[exp.Expression]: 3864 lateral = self._parse_lateral() 3865 if lateral: 3866 return lateral 3867 3868 unnest = self._parse_unnest() 3869 if unnest: 3870 return unnest 3871 3872 values = self._parse_derived_table_values() 3873 if values: 3874 return values 3875 3876 subquery = self._parse_select(table=True) 3877 if subquery: 3878 if not subquery.args.get("pivots"): 3879 subquery.set("pivots", self._parse_pivots()) 3880 return subquery 3881 3882 bracket = parse_bracket and self._parse_bracket(None) 3883 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3884 3885 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3886 self._parse_table 3887 ) 3888 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3889 3890 only = self._match(TokenType.ONLY) 3891 3892 this = t.cast( 3893 exp.Expression, 3894 bracket 3895 or rows_from 3896 or self._parse_bracket( 3897 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3898 ), 3899 ) 3900 3901 if only: 3902 this.set("only", only) 3903 3904 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3905 self._match_text_seq("*") 3906 3907 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3908 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3909 this.set("partition", self._parse_partition()) 3910 3911 if schema: 3912 return self._parse_schema(this=this) 3913 3914 version = self._parse_version() 3915 3916 if version: 3917 this.set("version", version) 3918 3919 if self.dialect.ALIAS_POST_TABLESAMPLE: 3920 this.set("sample", self._parse_table_sample()) 3921 3922 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3923 if alias: 3924 this.set("alias", alias) 3925 3926 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3927 return self.expression( 3928 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3929 ) 3930 3931 this.set("hints", self._parse_table_hints()) 3932 3933 if not this.args.get("pivots"): 3934 this.set("pivots", self._parse_pivots()) 3935 3936 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3937 this.set("sample", self._parse_table_sample()) 3938 3939 if joins: 3940 for join in self._parse_joins(): 3941 this.append("joins", join) 3942 3943 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3944 this.set("ordinality", True) 3945 this.set("alias", self._parse_table_alias()) 3946 3947 return this 3948 3949 def _parse_version(self) -> t.Optional[exp.Version]: 3950 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3951 this = "TIMESTAMP" 3952 elif self._match(TokenType.VERSION_SNAPSHOT): 3953 this = "VERSION" 3954 else: 3955 return None 3956 3957 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3958 kind = self._prev.text.upper() 3959 start = self._parse_bitwise() 3960 self._match_texts(("TO", "AND")) 3961 end = self._parse_bitwise() 3962 expression: t.Optional[exp.Expression] = self.expression( 3963 exp.Tuple, expressions=[start, end] 3964 ) 3965 elif self._match_text_seq("CONTAINED", "IN"): 3966 kind = "CONTAINED IN" 3967 expression = self.expression( 3968 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3969 ) 3970 elif self._match(TokenType.ALL): 3971 kind = "ALL" 3972 expression = None 3973 else: 3974 self._match_text_seq("AS", "OF") 3975 kind = "AS OF" 3976 expression = self._parse_type() 3977 3978 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3979 3980 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3981 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3982 index = self._index 3983 historical_data = None 3984 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3985 this = self._prev.text.upper() 3986 kind = ( 3987 self._match(TokenType.L_PAREN) 3988 and self._match_texts(self.HISTORICAL_DATA_KIND) 3989 and self._prev.text.upper() 3990 ) 3991 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3992 3993 if expression: 3994 self._match_r_paren() 3995 historical_data = self.expression( 3996 exp.HistoricalData, this=this, kind=kind, expression=expression 3997 ) 3998 else: 3999 self._retreat(index) 4000 4001 return historical_data 4002 4003 def _parse_changes(self) -> t.Optional[exp.Changes]: 4004 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4005 return None 4006 4007 information = self._parse_var(any_token=True) 4008 self._match_r_paren() 4009 4010 return self.expression( 4011 exp.Changes, 4012 information=information, 4013 at_before=self._parse_historical_data(), 4014 end=self._parse_historical_data(), 4015 ) 4016 4017 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4018 if not self._match(TokenType.UNNEST): 4019 return None 4020 4021 expressions = self._parse_wrapped_csv(self._parse_equality) 4022 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4023 4024 alias = self._parse_table_alias() if with_alias else None 4025 4026 if alias: 4027 if self.dialect.UNNEST_COLUMN_ONLY: 4028 if alias.args.get("columns"): 4029 self.raise_error("Unexpected extra column alias in unnest.") 4030 4031 alias.set("columns", [alias.this]) 4032 alias.set("this", None) 4033 4034 columns = alias.args.get("columns") or [] 4035 if offset and len(expressions) < len(columns): 4036 offset = columns.pop() 4037 4038 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4039 self._match(TokenType.ALIAS) 4040 offset = self._parse_id_var( 4041 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4042 ) or exp.to_identifier("offset") 4043 4044 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4045 4046 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4047 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4048 if not is_derived and not ( 4049 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4050 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4051 ): 4052 return None 4053 4054 expressions = self._parse_csv(self._parse_value) 4055 alias = self._parse_table_alias() 4056 4057 if is_derived: 4058 self._match_r_paren() 4059 4060 return self.expression( 4061 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4062 ) 4063 4064 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4065 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4066 as_modifier and self._match_text_seq("USING", "SAMPLE") 4067 ): 4068 return None 4069 4070 bucket_numerator = None 4071 bucket_denominator = None 4072 bucket_field = None 4073 percent = None 4074 size = None 4075 seed = None 4076 4077 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4078 matched_l_paren = self._match(TokenType.L_PAREN) 4079 4080 if self.TABLESAMPLE_CSV: 4081 num = None 4082 expressions = self._parse_csv(self._parse_primary) 4083 else: 4084 expressions = None 4085 num = ( 4086 self._parse_factor() 4087 if self._match(TokenType.NUMBER, advance=False) 4088 else self._parse_primary() or self._parse_placeholder() 4089 ) 4090 4091 if self._match_text_seq("BUCKET"): 4092 bucket_numerator = self._parse_number() 4093 self._match_text_seq("OUT", "OF") 4094 bucket_denominator = bucket_denominator = self._parse_number() 4095 self._match(TokenType.ON) 4096 bucket_field = self._parse_field() 4097 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4098 percent = num 4099 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4100 size = num 4101 else: 4102 percent = num 4103 4104 if matched_l_paren: 4105 self._match_r_paren() 4106 4107 if self._match(TokenType.L_PAREN): 4108 method = self._parse_var(upper=True) 4109 seed = self._match(TokenType.COMMA) and self._parse_number() 4110 self._match_r_paren() 4111 elif self._match_texts(("SEED", "REPEATABLE")): 4112 seed = self._parse_wrapped(self._parse_number) 4113 4114 if not method and self.DEFAULT_SAMPLING_METHOD: 4115 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4116 4117 return self.expression( 4118 exp.TableSample, 4119 expressions=expressions, 4120 method=method, 4121 bucket_numerator=bucket_numerator, 4122 bucket_denominator=bucket_denominator, 4123 bucket_field=bucket_field, 4124 percent=percent, 4125 size=size, 4126 seed=seed, 4127 ) 4128 4129 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4130 return list(iter(self._parse_pivot, None)) or None 4131 4132 def _parse_joins(self) -> t.Iterator[exp.Join]: 4133 return iter(self._parse_join, None) 4134 4135 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4136 if not self._match(TokenType.INTO): 4137 return None 4138 4139 return self.expression( 4140 exp.UnpivotColumns, 4141 this=self._match_text_seq("NAME") and self._parse_column(), 4142 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4143 ) 4144 4145 # https://duckdb.org/docs/sql/statements/pivot 4146 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4147 def _parse_on() -> t.Optional[exp.Expression]: 4148 this = self._parse_bitwise() 4149 4150 if self._match(TokenType.IN): 4151 # PIVOT ... ON col IN (row_val1, row_val2) 4152 return self._parse_in(this) 4153 if self._match(TokenType.ALIAS, advance=False): 4154 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4155 return self._parse_alias(this) 4156 4157 return this 4158 4159 this = self._parse_table() 4160 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4161 into = self._parse_unpivot_columns() 4162 using = self._match(TokenType.USING) and self._parse_csv( 4163 lambda: self._parse_alias(self._parse_function()) 4164 ) 4165 group = self._parse_group() 4166 4167 return self.expression( 4168 exp.Pivot, 4169 this=this, 4170 expressions=expressions, 4171 using=using, 4172 group=group, 4173 unpivot=is_unpivot, 4174 into=into, 4175 ) 4176 4177 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4178 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4179 this = self._parse_select_or_expression() 4180 4181 self._match(TokenType.ALIAS) 4182 alias = self._parse_bitwise() 4183 if alias: 4184 if isinstance(alias, exp.Column) and not alias.db: 4185 alias = alias.this 4186 return self.expression(exp.PivotAlias, this=this, alias=alias) 4187 4188 return this 4189 4190 value = self._parse_column() 4191 4192 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4193 self.raise_error("Expecting IN (") 4194 4195 if self._match(TokenType.ANY): 4196 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4197 else: 4198 exprs = self._parse_csv(_parse_aliased_expression) 4199 4200 self._match_r_paren() 4201 return self.expression(exp.In, this=value, expressions=exprs) 4202 4203 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4204 index = self._index 4205 include_nulls = None 4206 4207 if self._match(TokenType.PIVOT): 4208 unpivot = False 4209 elif self._match(TokenType.UNPIVOT): 4210 unpivot = True 4211 4212 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4213 if self._match_text_seq("INCLUDE", "NULLS"): 4214 include_nulls = True 4215 elif self._match_text_seq("EXCLUDE", "NULLS"): 4216 include_nulls = False 4217 else: 4218 return None 4219 4220 expressions = [] 4221 4222 if not self._match(TokenType.L_PAREN): 4223 self._retreat(index) 4224 return None 4225 4226 if unpivot: 4227 expressions = self._parse_csv(self._parse_column) 4228 else: 4229 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4230 4231 if not expressions: 4232 self.raise_error("Failed to parse PIVOT's aggregation list") 4233 4234 if not self._match(TokenType.FOR): 4235 self.raise_error("Expecting FOR") 4236 4237 field = self._parse_pivot_in() 4238 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4239 self._parse_bitwise 4240 ) 4241 4242 self._match_r_paren() 4243 4244 pivot = self.expression( 4245 exp.Pivot, 4246 expressions=expressions, 4247 field=field, 4248 unpivot=unpivot, 4249 include_nulls=include_nulls, 4250 default_on_null=default_on_null, 4251 ) 4252 4253 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4254 pivot.set("alias", self._parse_table_alias()) 4255 4256 if not unpivot: 4257 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4258 4259 columns: t.List[exp.Expression] = [] 4260 pivot_field_expressions = pivot.args["field"].expressions 4261 4262 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4263 if not isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4264 for fld in pivot_field_expressions: 4265 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4266 for name in names: 4267 if self.PREFIXED_PIVOT_COLUMNS: 4268 name = f"{name}_{field_name}" if name else field_name 4269 else: 4270 name = f"{field_name}_{name}" if name else field_name 4271 4272 columns.append(exp.to_identifier(name)) 4273 4274 pivot.set("columns", columns) 4275 4276 return pivot 4277 4278 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4279 return [agg.alias for agg in aggregations] 4280 4281 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4282 if not skip_where_token and not self._match(TokenType.PREWHERE): 4283 return None 4284 4285 return self.expression( 4286 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4287 ) 4288 4289 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4290 if not skip_where_token and not self._match(TokenType.WHERE): 4291 return None 4292 4293 return self.expression( 4294 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4295 ) 4296 4297 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4298 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4299 return None 4300 4301 elements: t.Dict[str, t.Any] = defaultdict(list) 4302 4303 if self._match(TokenType.ALL): 4304 elements["all"] = True 4305 elif self._match(TokenType.DISTINCT): 4306 elements["all"] = False 4307 4308 while True: 4309 index = self._index 4310 4311 elements["expressions"].extend( 4312 self._parse_csv( 4313 lambda: None 4314 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4315 else self._parse_assignment() 4316 ) 4317 ) 4318 4319 before_with_index = self._index 4320 with_prefix = self._match(TokenType.WITH) 4321 4322 if self._match(TokenType.ROLLUP): 4323 elements["rollup"].append( 4324 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4325 ) 4326 elif self._match(TokenType.CUBE): 4327 elements["cube"].append( 4328 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4329 ) 4330 elif self._match(TokenType.GROUPING_SETS): 4331 elements["grouping_sets"].append( 4332 self.expression( 4333 exp.GroupingSets, 4334 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4335 ) 4336 ) 4337 elif self._match_text_seq("TOTALS"): 4338 elements["totals"] = True # type: ignore 4339 4340 if before_with_index <= self._index <= before_with_index + 1: 4341 self._retreat(before_with_index) 4342 break 4343 4344 if index == self._index: 4345 break 4346 4347 return self.expression(exp.Group, **elements) # type: ignore 4348 4349 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4350 return self.expression( 4351 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4352 ) 4353 4354 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4355 if self._match(TokenType.L_PAREN): 4356 grouping_set = self._parse_csv(self._parse_column) 4357 self._match_r_paren() 4358 return self.expression(exp.Tuple, expressions=grouping_set) 4359 4360 return self._parse_column() 4361 4362 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4363 if not skip_having_token and not self._match(TokenType.HAVING): 4364 return None 4365 return self.expression(exp.Having, this=self._parse_assignment()) 4366 4367 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4368 if not self._match(TokenType.QUALIFY): 4369 return None 4370 return self.expression(exp.Qualify, this=self._parse_assignment()) 4371 4372 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4373 if skip_start_token: 4374 start = None 4375 elif self._match(TokenType.START_WITH): 4376 start = self._parse_assignment() 4377 else: 4378 return None 4379 4380 self._match(TokenType.CONNECT_BY) 4381 nocycle = self._match_text_seq("NOCYCLE") 4382 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4383 exp.Prior, this=self._parse_bitwise() 4384 ) 4385 connect = self._parse_assignment() 4386 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4387 4388 if not start and self._match(TokenType.START_WITH): 4389 start = self._parse_assignment() 4390 4391 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4392 4393 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4394 this = self._parse_id_var(any_token=True) 4395 if self._match(TokenType.ALIAS): 4396 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4397 return this 4398 4399 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4400 if self._match_text_seq("INTERPOLATE"): 4401 return self._parse_wrapped_csv(self._parse_name_as_expression) 4402 return None 4403 4404 def _parse_order( 4405 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4406 ) -> t.Optional[exp.Expression]: 4407 siblings = None 4408 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4409 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4410 return this 4411 4412 siblings = True 4413 4414 return self.expression( 4415 exp.Order, 4416 this=this, 4417 expressions=self._parse_csv(self._parse_ordered), 4418 siblings=siblings, 4419 ) 4420 4421 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4422 if not self._match(token): 4423 return None 4424 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4425 4426 def _parse_ordered( 4427 self, parse_method: t.Optional[t.Callable] = None 4428 ) -> t.Optional[exp.Ordered]: 4429 this = parse_method() if parse_method else self._parse_assignment() 4430 if not this: 4431 return None 4432 4433 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4434 this = exp.var("ALL") 4435 4436 asc = self._match(TokenType.ASC) 4437 desc = self._match(TokenType.DESC) or (asc and False) 4438 4439 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4440 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4441 4442 nulls_first = is_nulls_first or False 4443 explicitly_null_ordered = is_nulls_first or is_nulls_last 4444 4445 if ( 4446 not explicitly_null_ordered 4447 and ( 4448 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4449 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4450 ) 4451 and self.dialect.NULL_ORDERING != "nulls_are_last" 4452 ): 4453 nulls_first = True 4454 4455 if self._match_text_seq("WITH", "FILL"): 4456 with_fill = self.expression( 4457 exp.WithFill, 4458 **{ # type: ignore 4459 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4460 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4461 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4462 "interpolate": self._parse_interpolate(), 4463 }, 4464 ) 4465 else: 4466 with_fill = None 4467 4468 return self.expression( 4469 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4470 ) 4471 4472 def _parse_limit_options(self) -> exp.LimitOptions: 4473 percent = self._match(TokenType.PERCENT) 4474 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4475 self._match_text_seq("ONLY") 4476 with_ties = self._match_text_seq("WITH", "TIES") 4477 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4478 4479 def _parse_limit( 4480 self, 4481 this: t.Optional[exp.Expression] = None, 4482 top: bool = False, 4483 skip_limit_token: bool = False, 4484 ) -> t.Optional[exp.Expression]: 4485 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4486 comments = self._prev_comments 4487 if top: 4488 limit_paren = self._match(TokenType.L_PAREN) 4489 expression = self._parse_term() if limit_paren else self._parse_number() 4490 4491 if limit_paren: 4492 self._match_r_paren() 4493 4494 limit_options = self._parse_limit_options() 4495 else: 4496 limit_options = None 4497 expression = self._parse_term() 4498 4499 if self._match(TokenType.COMMA): 4500 offset = expression 4501 expression = self._parse_term() 4502 else: 4503 offset = None 4504 4505 limit_exp = self.expression( 4506 exp.Limit, 4507 this=this, 4508 expression=expression, 4509 offset=offset, 4510 comments=comments, 4511 limit_options=limit_options, 4512 expressions=self._parse_limit_by(), 4513 ) 4514 4515 return limit_exp 4516 4517 if self._match(TokenType.FETCH): 4518 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4519 direction = self._prev.text.upper() if direction else "FIRST" 4520 4521 count = self._parse_field(tokens=self.FETCH_TOKENS) 4522 4523 return self.expression( 4524 exp.Fetch, 4525 direction=direction, 4526 count=count, 4527 limit_options=self._parse_limit_options(), 4528 ) 4529 4530 return this 4531 4532 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4533 if not self._match(TokenType.OFFSET): 4534 return this 4535 4536 count = self._parse_term() 4537 self._match_set((TokenType.ROW, TokenType.ROWS)) 4538 4539 return self.expression( 4540 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4541 ) 4542 4543 def _can_parse_limit_or_offset(self) -> bool: 4544 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4545 return False 4546 4547 index = self._index 4548 result = bool( 4549 self._try_parse(self._parse_limit, retreat=True) 4550 or self._try_parse(self._parse_offset, retreat=True) 4551 ) 4552 self._retreat(index) 4553 return result 4554 4555 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4556 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4557 4558 def _parse_locks(self) -> t.List[exp.Lock]: 4559 locks = [] 4560 while True: 4561 if self._match_text_seq("FOR", "UPDATE"): 4562 update = True 4563 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4564 "LOCK", "IN", "SHARE", "MODE" 4565 ): 4566 update = False 4567 else: 4568 break 4569 4570 expressions = None 4571 if self._match_text_seq("OF"): 4572 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4573 4574 wait: t.Optional[bool | exp.Expression] = None 4575 if self._match_text_seq("NOWAIT"): 4576 wait = True 4577 elif self._match_text_seq("WAIT"): 4578 wait = self._parse_primary() 4579 elif self._match_text_seq("SKIP", "LOCKED"): 4580 wait = False 4581 4582 locks.append( 4583 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4584 ) 4585 4586 return locks 4587 4588 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4589 while this and self._match_set(self.SET_OPERATIONS): 4590 token_type = self._prev.token_type 4591 4592 if token_type == TokenType.UNION: 4593 operation: t.Type[exp.SetOperation] = exp.Union 4594 elif token_type == TokenType.EXCEPT: 4595 operation = exp.Except 4596 else: 4597 operation = exp.Intersect 4598 4599 comments = self._prev.comments 4600 4601 if self._match(TokenType.DISTINCT): 4602 distinct: t.Optional[bool] = True 4603 elif self._match(TokenType.ALL): 4604 distinct = False 4605 else: 4606 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4607 if distinct is None: 4608 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4609 4610 by_name = self._match_text_seq("BY", "NAME") 4611 expression = self._parse_select(nested=True, parse_set_operation=False) 4612 4613 this = self.expression( 4614 operation, 4615 comments=comments, 4616 this=this, 4617 distinct=distinct, 4618 by_name=by_name, 4619 expression=expression, 4620 ) 4621 4622 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4623 expression = this.expression 4624 4625 if expression: 4626 for arg in self.SET_OP_MODIFIERS: 4627 expr = expression.args.get(arg) 4628 if expr: 4629 this.set(arg, expr.pop()) 4630 4631 return this 4632 4633 def _parse_expression(self) -> t.Optional[exp.Expression]: 4634 return self._parse_alias(self._parse_assignment()) 4635 4636 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4637 this = self._parse_disjunction() 4638 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4639 # This allows us to parse <non-identifier token> := <expr> 4640 this = exp.column( 4641 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4642 ) 4643 4644 while self._match_set(self.ASSIGNMENT): 4645 if isinstance(this, exp.Column) and len(this.parts) == 1: 4646 this = this.this 4647 4648 this = self.expression( 4649 self.ASSIGNMENT[self._prev.token_type], 4650 this=this, 4651 comments=self._prev_comments, 4652 expression=self._parse_assignment(), 4653 ) 4654 4655 return this 4656 4657 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4658 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4659 4660 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4661 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4662 4663 def _parse_equality(self) -> t.Optional[exp.Expression]: 4664 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4665 4666 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4667 return self._parse_tokens(self._parse_range, self.COMPARISON) 4668 4669 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4670 this = this or self._parse_bitwise() 4671 negate = self._match(TokenType.NOT) 4672 4673 if self._match_set(self.RANGE_PARSERS): 4674 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4675 if not expression: 4676 return this 4677 4678 this = expression 4679 elif self._match(TokenType.ISNULL): 4680 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4681 4682 # Postgres supports ISNULL and NOTNULL for conditions. 4683 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4684 if self._match(TokenType.NOTNULL): 4685 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4686 this = self.expression(exp.Not, this=this) 4687 4688 if negate: 4689 this = self._negate_range(this) 4690 4691 if self._match(TokenType.IS): 4692 this = self._parse_is(this) 4693 4694 return this 4695 4696 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4697 if not this: 4698 return this 4699 4700 return self.expression(exp.Not, this=this) 4701 4702 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4703 index = self._index - 1 4704 negate = self._match(TokenType.NOT) 4705 4706 if self._match_text_seq("DISTINCT", "FROM"): 4707 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4708 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4709 4710 if self._match(TokenType.JSON): 4711 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4712 4713 if self._match_text_seq("WITH"): 4714 _with = True 4715 elif self._match_text_seq("WITHOUT"): 4716 _with = False 4717 else: 4718 _with = None 4719 4720 unique = self._match(TokenType.UNIQUE) 4721 self._match_text_seq("KEYS") 4722 expression: t.Optional[exp.Expression] = self.expression( 4723 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4724 ) 4725 else: 4726 expression = self._parse_primary() or self._parse_null() 4727 if not expression: 4728 self._retreat(index) 4729 return None 4730 4731 this = self.expression(exp.Is, this=this, expression=expression) 4732 return self.expression(exp.Not, this=this) if negate else this 4733 4734 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4735 unnest = self._parse_unnest(with_alias=False) 4736 if unnest: 4737 this = self.expression(exp.In, this=this, unnest=unnest) 4738 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4739 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4740 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4741 4742 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4743 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4744 else: 4745 this = self.expression(exp.In, this=this, expressions=expressions) 4746 4747 if matched_l_paren: 4748 self._match_r_paren(this) 4749 elif not self._match(TokenType.R_BRACKET, expression=this): 4750 self.raise_error("Expecting ]") 4751 else: 4752 this = self.expression(exp.In, this=this, field=self._parse_column()) 4753 4754 return this 4755 4756 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4757 low = self._parse_bitwise() 4758 self._match(TokenType.AND) 4759 high = self._parse_bitwise() 4760 return self.expression(exp.Between, this=this, low=low, high=high) 4761 4762 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4763 if not self._match(TokenType.ESCAPE): 4764 return this 4765 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4766 4767 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4768 index = self._index 4769 4770 if not self._match(TokenType.INTERVAL) and match_interval: 4771 return None 4772 4773 if self._match(TokenType.STRING, advance=False): 4774 this = self._parse_primary() 4775 else: 4776 this = self._parse_term() 4777 4778 if not this or ( 4779 isinstance(this, exp.Column) 4780 and not this.table 4781 and not this.this.quoted 4782 and this.name.upper() == "IS" 4783 ): 4784 self._retreat(index) 4785 return None 4786 4787 unit = self._parse_function() or ( 4788 not self._match(TokenType.ALIAS, advance=False) 4789 and self._parse_var(any_token=True, upper=True) 4790 ) 4791 4792 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4793 # each INTERVAL expression into this canonical form so it's easy to transpile 4794 if this and this.is_number: 4795 this = exp.Literal.string(this.to_py()) 4796 elif this and this.is_string: 4797 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4798 if parts and unit: 4799 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4800 unit = None 4801 self._retreat(self._index - 1) 4802 4803 if len(parts) == 1: 4804 this = exp.Literal.string(parts[0][0]) 4805 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4806 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4807 unit = self.expression( 4808 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4809 ) 4810 4811 interval = self.expression(exp.Interval, this=this, unit=unit) 4812 4813 index = self._index 4814 self._match(TokenType.PLUS) 4815 4816 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4817 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4818 return self.expression( 4819 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4820 ) 4821 4822 self._retreat(index) 4823 return interval 4824 4825 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4826 this = self._parse_term() 4827 4828 while True: 4829 if self._match_set(self.BITWISE): 4830 this = self.expression( 4831 self.BITWISE[self._prev.token_type], 4832 this=this, 4833 expression=self._parse_term(), 4834 ) 4835 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4836 this = self.expression( 4837 exp.DPipe, 4838 this=this, 4839 expression=self._parse_term(), 4840 safe=not self.dialect.STRICT_STRING_CONCAT, 4841 ) 4842 elif self._match(TokenType.DQMARK): 4843 this = self.expression( 4844 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4845 ) 4846 elif self._match_pair(TokenType.LT, TokenType.LT): 4847 this = self.expression( 4848 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4849 ) 4850 elif self._match_pair(TokenType.GT, TokenType.GT): 4851 this = self.expression( 4852 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4853 ) 4854 else: 4855 break 4856 4857 return this 4858 4859 def _parse_term(self) -> t.Optional[exp.Expression]: 4860 this = self._parse_factor() 4861 4862 while self._match_set(self.TERM): 4863 klass = self.TERM[self._prev.token_type] 4864 comments = self._prev_comments 4865 expression = self._parse_factor() 4866 4867 this = self.expression(klass, this=this, comments=comments, expression=expression) 4868 4869 if isinstance(this, exp.Collate): 4870 expr = this.expression 4871 4872 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4873 # fallback to Identifier / Var 4874 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4875 ident = expr.this 4876 if isinstance(ident, exp.Identifier): 4877 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4878 4879 return this 4880 4881 def _parse_factor(self) -> t.Optional[exp.Expression]: 4882 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4883 this = parse_method() 4884 4885 while self._match_set(self.FACTOR): 4886 klass = self.FACTOR[self._prev.token_type] 4887 comments = self._prev_comments 4888 expression = parse_method() 4889 4890 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4891 self._retreat(self._index - 1) 4892 return this 4893 4894 this = self.expression(klass, this=this, comments=comments, expression=expression) 4895 4896 if isinstance(this, exp.Div): 4897 this.args["typed"] = self.dialect.TYPED_DIVISION 4898 this.args["safe"] = self.dialect.SAFE_DIVISION 4899 4900 return this 4901 4902 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4903 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4904 4905 def _parse_unary(self) -> t.Optional[exp.Expression]: 4906 if self._match_set(self.UNARY_PARSERS): 4907 return self.UNARY_PARSERS[self._prev.token_type](self) 4908 return self._parse_at_time_zone(self._parse_type()) 4909 4910 def _parse_type( 4911 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4912 ) -> t.Optional[exp.Expression]: 4913 interval = parse_interval and self._parse_interval() 4914 if interval: 4915 return interval 4916 4917 index = self._index 4918 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4919 4920 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4921 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4922 if isinstance(data_type, exp.Cast): 4923 # This constructor can contain ops directly after it, for instance struct unnesting: 4924 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4925 return self._parse_column_ops(data_type) 4926 4927 if data_type: 4928 index2 = self._index 4929 this = self._parse_primary() 4930 4931 if isinstance(this, exp.Literal): 4932 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4933 if parser: 4934 return parser(self, this, data_type) 4935 4936 return self.expression(exp.Cast, this=this, to=data_type) 4937 4938 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4939 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4940 # 4941 # If the index difference here is greater than 1, that means the parser itself must have 4942 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4943 # 4944 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4945 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4946 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4947 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4948 # 4949 # In these cases, we don't really want to return the converted type, but instead retreat 4950 # and try to parse a Column or Identifier in the section below. 4951 if data_type.expressions and index2 - index > 1: 4952 self._retreat(index2) 4953 return self._parse_column_ops(data_type) 4954 4955 self._retreat(index) 4956 4957 if fallback_to_identifier: 4958 return self._parse_id_var() 4959 4960 this = self._parse_column() 4961 return this and self._parse_column_ops(this) 4962 4963 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4964 this = self._parse_type() 4965 if not this: 4966 return None 4967 4968 if isinstance(this, exp.Column) and not this.table: 4969 this = exp.var(this.name.upper()) 4970 4971 return self.expression( 4972 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4973 ) 4974 4975 def _parse_types( 4976 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4977 ) -> t.Optional[exp.Expression]: 4978 index = self._index 4979 4980 this: t.Optional[exp.Expression] = None 4981 prefix = self._match_text_seq("SYSUDTLIB", ".") 4982 4983 if not self._match_set(self.TYPE_TOKENS): 4984 identifier = allow_identifiers and self._parse_id_var( 4985 any_token=False, tokens=(TokenType.VAR,) 4986 ) 4987 if isinstance(identifier, exp.Identifier): 4988 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4989 4990 if len(tokens) != 1: 4991 self.raise_error("Unexpected identifier", self._prev) 4992 4993 if tokens[0].token_type in self.TYPE_TOKENS: 4994 self._prev = tokens[0] 4995 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4996 type_name = identifier.name 4997 4998 while self._match(TokenType.DOT): 4999 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5000 5001 this = exp.DataType.build(type_name, udt=True) 5002 else: 5003 self._retreat(self._index - 1) 5004 return None 5005 else: 5006 return None 5007 5008 type_token = self._prev.token_type 5009 5010 if type_token == TokenType.PSEUDO_TYPE: 5011 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5012 5013 if type_token == TokenType.OBJECT_IDENTIFIER: 5014 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5015 5016 # https://materialize.com/docs/sql/types/map/ 5017 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5018 key_type = self._parse_types( 5019 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5020 ) 5021 if not self._match(TokenType.FARROW): 5022 self._retreat(index) 5023 return None 5024 5025 value_type = self._parse_types( 5026 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5027 ) 5028 if not self._match(TokenType.R_BRACKET): 5029 self._retreat(index) 5030 return None 5031 5032 return exp.DataType( 5033 this=exp.DataType.Type.MAP, 5034 expressions=[key_type, value_type], 5035 nested=True, 5036 prefix=prefix, 5037 ) 5038 5039 nested = type_token in self.NESTED_TYPE_TOKENS 5040 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5041 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5042 expressions = None 5043 maybe_func = False 5044 5045 if self._match(TokenType.L_PAREN): 5046 if is_struct: 5047 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5048 elif nested: 5049 expressions = self._parse_csv( 5050 lambda: self._parse_types( 5051 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5052 ) 5053 ) 5054 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5055 this = expressions[0] 5056 this.set("nullable", True) 5057 self._match_r_paren() 5058 return this 5059 elif type_token in self.ENUM_TYPE_TOKENS: 5060 expressions = self._parse_csv(self._parse_equality) 5061 elif is_aggregate: 5062 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5063 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5064 ) 5065 if not func_or_ident: 5066 return None 5067 expressions = [func_or_ident] 5068 if self._match(TokenType.COMMA): 5069 expressions.extend( 5070 self._parse_csv( 5071 lambda: self._parse_types( 5072 check_func=check_func, 5073 schema=schema, 5074 allow_identifiers=allow_identifiers, 5075 ) 5076 ) 5077 ) 5078 else: 5079 expressions = self._parse_csv(self._parse_type_size) 5080 5081 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5082 if type_token == TokenType.VECTOR and len(expressions) == 2: 5083 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5084 5085 if not expressions or not self._match(TokenType.R_PAREN): 5086 self._retreat(index) 5087 return None 5088 5089 maybe_func = True 5090 5091 values: t.Optional[t.List[exp.Expression]] = None 5092 5093 if nested and self._match(TokenType.LT): 5094 if is_struct: 5095 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5096 else: 5097 expressions = self._parse_csv( 5098 lambda: self._parse_types( 5099 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5100 ) 5101 ) 5102 5103 if not self._match(TokenType.GT): 5104 self.raise_error("Expecting >") 5105 5106 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5107 values = self._parse_csv(self._parse_assignment) 5108 if not values and is_struct: 5109 values = None 5110 self._retreat(self._index - 1) 5111 else: 5112 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5113 5114 if type_token in self.TIMESTAMPS: 5115 if self._match_text_seq("WITH", "TIME", "ZONE"): 5116 maybe_func = False 5117 tz_type = ( 5118 exp.DataType.Type.TIMETZ 5119 if type_token in self.TIMES 5120 else exp.DataType.Type.TIMESTAMPTZ 5121 ) 5122 this = exp.DataType(this=tz_type, expressions=expressions) 5123 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5124 maybe_func = False 5125 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5126 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5127 maybe_func = False 5128 elif type_token == TokenType.INTERVAL: 5129 unit = self._parse_var(upper=True) 5130 if unit: 5131 if self._match_text_seq("TO"): 5132 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5133 5134 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5135 else: 5136 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5137 5138 if maybe_func and check_func: 5139 index2 = self._index 5140 peek = self._parse_string() 5141 5142 if not peek: 5143 self._retreat(index) 5144 return None 5145 5146 self._retreat(index2) 5147 5148 if not this: 5149 if self._match_text_seq("UNSIGNED"): 5150 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5151 if not unsigned_type_token: 5152 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5153 5154 type_token = unsigned_type_token or type_token 5155 5156 this = exp.DataType( 5157 this=exp.DataType.Type[type_token.value], 5158 expressions=expressions, 5159 nested=nested, 5160 prefix=prefix, 5161 ) 5162 5163 # Empty arrays/structs are allowed 5164 if values is not None: 5165 cls = exp.Struct if is_struct else exp.Array 5166 this = exp.cast(cls(expressions=values), this, copy=False) 5167 5168 elif expressions: 5169 this.set("expressions", expressions) 5170 5171 # https://materialize.com/docs/sql/types/list/#type-name 5172 while self._match(TokenType.LIST): 5173 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5174 5175 index = self._index 5176 5177 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5178 matched_array = self._match(TokenType.ARRAY) 5179 5180 while self._curr: 5181 datatype_token = self._prev.token_type 5182 matched_l_bracket = self._match(TokenType.L_BRACKET) 5183 5184 if (not matched_l_bracket and not matched_array) or ( 5185 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5186 ): 5187 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5188 # not to be confused with the fixed size array parsing 5189 break 5190 5191 matched_array = False 5192 values = self._parse_csv(self._parse_assignment) or None 5193 if ( 5194 values 5195 and not schema 5196 and ( 5197 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5198 ) 5199 ): 5200 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5201 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5202 self._retreat(index) 5203 break 5204 5205 this = exp.DataType( 5206 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5207 ) 5208 self._match(TokenType.R_BRACKET) 5209 5210 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5211 converter = self.TYPE_CONVERTERS.get(this.this) 5212 if converter: 5213 this = converter(t.cast(exp.DataType, this)) 5214 5215 return this 5216 5217 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5218 index = self._index 5219 5220 if ( 5221 self._curr 5222 and self._next 5223 and self._curr.token_type in self.TYPE_TOKENS 5224 and self._next.token_type in self.TYPE_TOKENS 5225 ): 5226 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5227 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5228 this = self._parse_id_var() 5229 else: 5230 this = ( 5231 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5232 or self._parse_id_var() 5233 ) 5234 5235 self._match(TokenType.COLON) 5236 5237 if ( 5238 type_required 5239 and not isinstance(this, exp.DataType) 5240 and not self._match_set(self.TYPE_TOKENS, advance=False) 5241 ): 5242 self._retreat(index) 5243 return self._parse_types() 5244 5245 return self._parse_column_def(this) 5246 5247 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5248 if not self._match_text_seq("AT", "TIME", "ZONE"): 5249 return this 5250 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5251 5252 def _parse_column(self) -> t.Optional[exp.Expression]: 5253 this = self._parse_column_reference() 5254 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5255 5256 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5257 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5258 5259 return column 5260 5261 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5262 this = self._parse_field() 5263 if ( 5264 not this 5265 and self._match(TokenType.VALUES, advance=False) 5266 and self.VALUES_FOLLOWED_BY_PAREN 5267 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5268 ): 5269 this = self._parse_id_var() 5270 5271 if isinstance(this, exp.Identifier): 5272 # We bubble up comments from the Identifier to the Column 5273 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5274 5275 return this 5276 5277 def _parse_colon_as_variant_extract( 5278 self, this: t.Optional[exp.Expression] 5279 ) -> t.Optional[exp.Expression]: 5280 casts = [] 5281 json_path = [] 5282 escape = None 5283 5284 while self._match(TokenType.COLON): 5285 start_index = self._index 5286 5287 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5288 path = self._parse_column_ops( 5289 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5290 ) 5291 5292 # The cast :: operator has a lower precedence than the extraction operator :, so 5293 # we rearrange the AST appropriately to avoid casting the JSON path 5294 while isinstance(path, exp.Cast): 5295 casts.append(path.to) 5296 path = path.this 5297 5298 if casts: 5299 dcolon_offset = next( 5300 i 5301 for i, t in enumerate(self._tokens[start_index:]) 5302 if t.token_type == TokenType.DCOLON 5303 ) 5304 end_token = self._tokens[start_index + dcolon_offset - 1] 5305 else: 5306 end_token = self._prev 5307 5308 if path: 5309 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5310 # it'll roundtrip to a string literal in GET_PATH 5311 if isinstance(path, exp.Identifier) and path.quoted: 5312 escape = True 5313 5314 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5315 5316 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5317 # Databricks transforms it back to the colon/dot notation 5318 if json_path: 5319 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5320 5321 if json_path_expr: 5322 json_path_expr.set("escape", escape) 5323 5324 this = self.expression( 5325 exp.JSONExtract, 5326 this=this, 5327 expression=json_path_expr, 5328 variant_extract=True, 5329 ) 5330 5331 while casts: 5332 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5333 5334 return this 5335 5336 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5337 return self._parse_types() 5338 5339 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5340 this = self._parse_bracket(this) 5341 5342 while self._match_set(self.COLUMN_OPERATORS): 5343 op_token = self._prev.token_type 5344 op = self.COLUMN_OPERATORS.get(op_token) 5345 5346 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5347 field = self._parse_dcolon() 5348 if not field: 5349 self.raise_error("Expected type") 5350 elif op and self._curr: 5351 field = self._parse_column_reference() or self._parse_bracket() 5352 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5353 field = self._parse_column_ops(field) 5354 else: 5355 field = self._parse_field(any_token=True, anonymous_func=True) 5356 5357 if isinstance(field, (exp.Func, exp.Window)) and this: 5358 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5359 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5360 this = exp.replace_tree( 5361 this, 5362 lambda n: ( 5363 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5364 if n.table 5365 else n.this 5366 ) 5367 if isinstance(n, exp.Column) 5368 else n, 5369 ) 5370 5371 if op: 5372 this = op(self, this, field) 5373 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5374 this = self.expression( 5375 exp.Column, 5376 comments=this.comments, 5377 this=field, 5378 table=this.this, 5379 db=this.args.get("table"), 5380 catalog=this.args.get("db"), 5381 ) 5382 elif isinstance(field, exp.Window): 5383 # Move the exp.Dot's to the window's function 5384 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5385 field.set("this", window_func) 5386 this = field 5387 else: 5388 this = self.expression(exp.Dot, this=this, expression=field) 5389 5390 if field and field.comments: 5391 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5392 5393 this = self._parse_bracket(this) 5394 5395 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5396 5397 def _parse_primary(self) -> t.Optional[exp.Expression]: 5398 if self._match_set(self.PRIMARY_PARSERS): 5399 token_type = self._prev.token_type 5400 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5401 5402 if token_type == TokenType.STRING: 5403 expressions = [primary] 5404 while self._match(TokenType.STRING): 5405 expressions.append(exp.Literal.string(self._prev.text)) 5406 5407 if len(expressions) > 1: 5408 return self.expression(exp.Concat, expressions=expressions) 5409 5410 return primary 5411 5412 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5413 return exp.Literal.number(f"0.{self._prev.text}") 5414 5415 if self._match(TokenType.L_PAREN): 5416 comments = self._prev_comments 5417 query = self._parse_select() 5418 5419 if query: 5420 expressions = [query] 5421 else: 5422 expressions = self._parse_expressions() 5423 5424 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5425 5426 if not this and self._match(TokenType.R_PAREN, advance=False): 5427 this = self.expression(exp.Tuple) 5428 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5429 this = self._parse_subquery(this=this, parse_alias=False) 5430 elif isinstance(this, exp.Subquery): 5431 this = self._parse_subquery( 5432 this=self._parse_set_operations(this), parse_alias=False 5433 ) 5434 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5435 this = self.expression(exp.Tuple, expressions=expressions) 5436 else: 5437 this = self.expression(exp.Paren, this=this) 5438 5439 if this: 5440 this.add_comments(comments) 5441 5442 self._match_r_paren(expression=this) 5443 return this 5444 5445 return None 5446 5447 def _parse_field( 5448 self, 5449 any_token: bool = False, 5450 tokens: t.Optional[t.Collection[TokenType]] = None, 5451 anonymous_func: bool = False, 5452 ) -> t.Optional[exp.Expression]: 5453 if anonymous_func: 5454 field = ( 5455 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5456 or self._parse_primary() 5457 ) 5458 else: 5459 field = self._parse_primary() or self._parse_function( 5460 anonymous=anonymous_func, any_token=any_token 5461 ) 5462 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5463 5464 def _parse_function( 5465 self, 5466 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5467 anonymous: bool = False, 5468 optional_parens: bool = True, 5469 any_token: bool = False, 5470 ) -> t.Optional[exp.Expression]: 5471 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5472 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5473 fn_syntax = False 5474 if ( 5475 self._match(TokenType.L_BRACE, advance=False) 5476 and self._next 5477 and self._next.text.upper() == "FN" 5478 ): 5479 self._advance(2) 5480 fn_syntax = True 5481 5482 func = self._parse_function_call( 5483 functions=functions, 5484 anonymous=anonymous, 5485 optional_parens=optional_parens, 5486 any_token=any_token, 5487 ) 5488 5489 if fn_syntax: 5490 self._match(TokenType.R_BRACE) 5491 5492 return func 5493 5494 def _parse_function_call( 5495 self, 5496 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5497 anonymous: bool = False, 5498 optional_parens: bool = True, 5499 any_token: bool = False, 5500 ) -> t.Optional[exp.Expression]: 5501 if not self._curr: 5502 return None 5503 5504 comments = self._curr.comments 5505 token_type = self._curr.token_type 5506 this = self._curr.text 5507 upper = this.upper() 5508 5509 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5510 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5511 self._advance() 5512 return self._parse_window(parser(self)) 5513 5514 if not self._next or self._next.token_type != TokenType.L_PAREN: 5515 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5516 self._advance() 5517 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5518 5519 return None 5520 5521 if any_token: 5522 if token_type in self.RESERVED_TOKENS: 5523 return None 5524 elif token_type not in self.FUNC_TOKENS: 5525 return None 5526 5527 self._advance(2) 5528 5529 parser = self.FUNCTION_PARSERS.get(upper) 5530 if parser and not anonymous: 5531 this = parser(self) 5532 else: 5533 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5534 5535 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5536 this = self.expression( 5537 subquery_predicate, comments=comments, this=self._parse_select() 5538 ) 5539 self._match_r_paren() 5540 return this 5541 5542 if functions is None: 5543 functions = self.FUNCTIONS 5544 5545 function = functions.get(upper) 5546 known_function = function and not anonymous 5547 5548 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5549 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5550 5551 post_func_comments = self._curr and self._curr.comments 5552 if known_function and post_func_comments: 5553 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5554 # call we'll construct it as exp.Anonymous, even if it's "known" 5555 if any( 5556 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5557 for comment in post_func_comments 5558 ): 5559 known_function = False 5560 5561 if alias and known_function: 5562 args = self._kv_to_prop_eq(args) 5563 5564 if known_function: 5565 func_builder = t.cast(t.Callable, function) 5566 5567 if "dialect" in func_builder.__code__.co_varnames: 5568 func = func_builder(args, dialect=self.dialect) 5569 else: 5570 func = func_builder(args) 5571 5572 func = self.validate_expression(func, args) 5573 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5574 func.meta["name"] = this 5575 5576 this = func 5577 else: 5578 if token_type == TokenType.IDENTIFIER: 5579 this = exp.Identifier(this=this, quoted=True) 5580 this = self.expression(exp.Anonymous, this=this, expressions=args) 5581 5582 if isinstance(this, exp.Expression): 5583 this.add_comments(comments) 5584 5585 self._match_r_paren(this) 5586 return self._parse_window(this) 5587 5588 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5589 return expression 5590 5591 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5592 transformed = [] 5593 5594 for index, e in enumerate(expressions): 5595 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5596 if isinstance(e, exp.Alias): 5597 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5598 5599 if not isinstance(e, exp.PropertyEQ): 5600 e = self.expression( 5601 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5602 ) 5603 5604 if isinstance(e.this, exp.Column): 5605 e.this.replace(e.this.this) 5606 else: 5607 e = self._to_prop_eq(e, index) 5608 5609 transformed.append(e) 5610 5611 return transformed 5612 5613 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5614 return self._parse_statement() 5615 5616 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5617 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5618 5619 def _parse_user_defined_function( 5620 self, kind: t.Optional[TokenType] = None 5621 ) -> t.Optional[exp.Expression]: 5622 this = self._parse_table_parts(schema=True) 5623 5624 if not self._match(TokenType.L_PAREN): 5625 return this 5626 5627 expressions = self._parse_csv(self._parse_function_parameter) 5628 self._match_r_paren() 5629 return self.expression( 5630 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5631 ) 5632 5633 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5634 literal = self._parse_primary() 5635 if literal: 5636 return self.expression(exp.Introducer, this=token.text, expression=literal) 5637 5638 return self.expression(exp.Identifier, this=token.text) 5639 5640 def _parse_session_parameter(self) -> exp.SessionParameter: 5641 kind = None 5642 this = self._parse_id_var() or self._parse_primary() 5643 5644 if this and self._match(TokenType.DOT): 5645 kind = this.name 5646 this = self._parse_var() or self._parse_primary() 5647 5648 return self.expression(exp.SessionParameter, this=this, kind=kind) 5649 5650 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5651 return self._parse_id_var() 5652 5653 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5654 index = self._index 5655 5656 if self._match(TokenType.L_PAREN): 5657 expressions = t.cast( 5658 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5659 ) 5660 5661 if not self._match(TokenType.R_PAREN): 5662 self._retreat(index) 5663 else: 5664 expressions = [self._parse_lambda_arg()] 5665 5666 if self._match_set(self.LAMBDAS): 5667 return self.LAMBDAS[self._prev.token_type](self, expressions) 5668 5669 self._retreat(index) 5670 5671 this: t.Optional[exp.Expression] 5672 5673 if self._match(TokenType.DISTINCT): 5674 this = self.expression( 5675 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5676 ) 5677 else: 5678 this = self._parse_select_or_expression(alias=alias) 5679 5680 return self._parse_limit( 5681 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5682 ) 5683 5684 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5685 index = self._index 5686 if not self._match(TokenType.L_PAREN): 5687 return this 5688 5689 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5690 # expr can be of both types 5691 if self._match_set(self.SELECT_START_TOKENS): 5692 self._retreat(index) 5693 return this 5694 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5695 self._match_r_paren() 5696 return self.expression(exp.Schema, this=this, expressions=args) 5697 5698 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5699 return self._parse_column_def(self._parse_field(any_token=True)) 5700 5701 def _parse_column_def( 5702 self, this: t.Optional[exp.Expression], computed_column: bool = True 5703 ) -> t.Optional[exp.Expression]: 5704 # column defs are not really columns, they're identifiers 5705 if isinstance(this, exp.Column): 5706 this = this.this 5707 5708 if not computed_column: 5709 self._match(TokenType.ALIAS) 5710 5711 kind = self._parse_types(schema=True) 5712 5713 if self._match_text_seq("FOR", "ORDINALITY"): 5714 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5715 5716 constraints: t.List[exp.Expression] = [] 5717 5718 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5719 ("ALIAS", "MATERIALIZED") 5720 ): 5721 persisted = self._prev.text.upper() == "MATERIALIZED" 5722 constraint_kind = exp.ComputedColumnConstraint( 5723 this=self._parse_assignment(), 5724 persisted=persisted or self._match_text_seq("PERSISTED"), 5725 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5726 ) 5727 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5728 elif ( 5729 kind 5730 and self._match(TokenType.ALIAS, advance=False) 5731 and ( 5732 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5733 or (self._next and self._next.token_type == TokenType.L_PAREN) 5734 ) 5735 ): 5736 self._advance() 5737 constraints.append( 5738 self.expression( 5739 exp.ColumnConstraint, 5740 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5741 ) 5742 ) 5743 5744 while True: 5745 constraint = self._parse_column_constraint() 5746 if not constraint: 5747 break 5748 constraints.append(constraint) 5749 5750 if not kind and not constraints: 5751 return this 5752 5753 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5754 5755 def _parse_auto_increment( 5756 self, 5757 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5758 start = None 5759 increment = None 5760 5761 if self._match(TokenType.L_PAREN, advance=False): 5762 args = self._parse_wrapped_csv(self._parse_bitwise) 5763 start = seq_get(args, 0) 5764 increment = seq_get(args, 1) 5765 elif self._match_text_seq("START"): 5766 start = self._parse_bitwise() 5767 self._match_text_seq("INCREMENT") 5768 increment = self._parse_bitwise() 5769 5770 if start and increment: 5771 return exp.GeneratedAsIdentityColumnConstraint( 5772 start=start, increment=increment, this=False 5773 ) 5774 5775 return exp.AutoIncrementColumnConstraint() 5776 5777 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5778 if not self._match_text_seq("REFRESH"): 5779 self._retreat(self._index - 1) 5780 return None 5781 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5782 5783 def _parse_compress(self) -> exp.CompressColumnConstraint: 5784 if self._match(TokenType.L_PAREN, advance=False): 5785 return self.expression( 5786 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5787 ) 5788 5789 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5790 5791 def _parse_generated_as_identity( 5792 self, 5793 ) -> ( 5794 exp.GeneratedAsIdentityColumnConstraint 5795 | exp.ComputedColumnConstraint 5796 | exp.GeneratedAsRowColumnConstraint 5797 ): 5798 if self._match_text_seq("BY", "DEFAULT"): 5799 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5800 this = self.expression( 5801 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5802 ) 5803 else: 5804 self._match_text_seq("ALWAYS") 5805 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5806 5807 self._match(TokenType.ALIAS) 5808 5809 if self._match_text_seq("ROW"): 5810 start = self._match_text_seq("START") 5811 if not start: 5812 self._match(TokenType.END) 5813 hidden = self._match_text_seq("HIDDEN") 5814 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5815 5816 identity = self._match_text_seq("IDENTITY") 5817 5818 if self._match(TokenType.L_PAREN): 5819 if self._match(TokenType.START_WITH): 5820 this.set("start", self._parse_bitwise()) 5821 if self._match_text_seq("INCREMENT", "BY"): 5822 this.set("increment", self._parse_bitwise()) 5823 if self._match_text_seq("MINVALUE"): 5824 this.set("minvalue", self._parse_bitwise()) 5825 if self._match_text_seq("MAXVALUE"): 5826 this.set("maxvalue", self._parse_bitwise()) 5827 5828 if self._match_text_seq("CYCLE"): 5829 this.set("cycle", True) 5830 elif self._match_text_seq("NO", "CYCLE"): 5831 this.set("cycle", False) 5832 5833 if not identity: 5834 this.set("expression", self._parse_range()) 5835 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5836 args = self._parse_csv(self._parse_bitwise) 5837 this.set("start", seq_get(args, 0)) 5838 this.set("increment", seq_get(args, 1)) 5839 5840 self._match_r_paren() 5841 5842 return this 5843 5844 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5845 self._match_text_seq("LENGTH") 5846 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5847 5848 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5849 if self._match_text_seq("NULL"): 5850 return self.expression(exp.NotNullColumnConstraint) 5851 if self._match_text_seq("CASESPECIFIC"): 5852 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5853 if self._match_text_seq("FOR", "REPLICATION"): 5854 return self.expression(exp.NotForReplicationColumnConstraint) 5855 5856 # Unconsume the `NOT` token 5857 self._retreat(self._index - 1) 5858 return None 5859 5860 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5861 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5862 5863 procedure_option_follows = ( 5864 self._match(TokenType.WITH, advance=False) 5865 and self._next 5866 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5867 ) 5868 5869 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5870 return self.expression( 5871 exp.ColumnConstraint, 5872 this=this, 5873 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5874 ) 5875 5876 return this 5877 5878 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5879 if not self._match(TokenType.CONSTRAINT): 5880 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5881 5882 return self.expression( 5883 exp.Constraint, 5884 this=self._parse_id_var(), 5885 expressions=self._parse_unnamed_constraints(), 5886 ) 5887 5888 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5889 constraints = [] 5890 while True: 5891 constraint = self._parse_unnamed_constraint() or self._parse_function() 5892 if not constraint: 5893 break 5894 constraints.append(constraint) 5895 5896 return constraints 5897 5898 def _parse_unnamed_constraint( 5899 self, constraints: t.Optional[t.Collection[str]] = None 5900 ) -> t.Optional[exp.Expression]: 5901 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5902 constraints or self.CONSTRAINT_PARSERS 5903 ): 5904 return None 5905 5906 constraint = self._prev.text.upper() 5907 if constraint not in self.CONSTRAINT_PARSERS: 5908 self.raise_error(f"No parser found for schema constraint {constraint}.") 5909 5910 return self.CONSTRAINT_PARSERS[constraint](self) 5911 5912 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5913 return self._parse_id_var(any_token=False) 5914 5915 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5916 self._match_text_seq("KEY") 5917 return self.expression( 5918 exp.UniqueColumnConstraint, 5919 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5920 this=self._parse_schema(self._parse_unique_key()), 5921 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5922 on_conflict=self._parse_on_conflict(), 5923 ) 5924 5925 def _parse_key_constraint_options(self) -> t.List[str]: 5926 options = [] 5927 while True: 5928 if not self._curr: 5929 break 5930 5931 if self._match(TokenType.ON): 5932 action = None 5933 on = self._advance_any() and self._prev.text 5934 5935 if self._match_text_seq("NO", "ACTION"): 5936 action = "NO ACTION" 5937 elif self._match_text_seq("CASCADE"): 5938 action = "CASCADE" 5939 elif self._match_text_seq("RESTRICT"): 5940 action = "RESTRICT" 5941 elif self._match_pair(TokenType.SET, TokenType.NULL): 5942 action = "SET NULL" 5943 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5944 action = "SET DEFAULT" 5945 else: 5946 self.raise_error("Invalid key constraint") 5947 5948 options.append(f"ON {on} {action}") 5949 else: 5950 var = self._parse_var_from_options( 5951 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5952 ) 5953 if not var: 5954 break 5955 options.append(var.name) 5956 5957 return options 5958 5959 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5960 if match and not self._match(TokenType.REFERENCES): 5961 return None 5962 5963 expressions = None 5964 this = self._parse_table(schema=True) 5965 options = self._parse_key_constraint_options() 5966 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5967 5968 def _parse_foreign_key(self) -> exp.ForeignKey: 5969 expressions = self._parse_wrapped_id_vars() 5970 reference = self._parse_references() 5971 options = {} 5972 5973 while self._match(TokenType.ON): 5974 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5975 self.raise_error("Expected DELETE or UPDATE") 5976 5977 kind = self._prev.text.lower() 5978 5979 if self._match_text_seq("NO", "ACTION"): 5980 action = "NO ACTION" 5981 elif self._match(TokenType.SET): 5982 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5983 action = "SET " + self._prev.text.upper() 5984 else: 5985 self._advance() 5986 action = self._prev.text.upper() 5987 5988 options[kind] = action 5989 5990 return self.expression( 5991 exp.ForeignKey, 5992 expressions=expressions, 5993 reference=reference, 5994 **options, # type: ignore 5995 ) 5996 5997 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5998 return self._parse_ordered() or self._parse_field() 5999 6000 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6001 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6002 self._retreat(self._index - 1) 6003 return None 6004 6005 id_vars = self._parse_wrapped_id_vars() 6006 return self.expression( 6007 exp.PeriodForSystemTimeConstraint, 6008 this=seq_get(id_vars, 0), 6009 expression=seq_get(id_vars, 1), 6010 ) 6011 6012 def _parse_primary_key( 6013 self, wrapped_optional: bool = False, in_props: bool = False 6014 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6015 desc = ( 6016 self._match_set((TokenType.ASC, TokenType.DESC)) 6017 and self._prev.token_type == TokenType.DESC 6018 ) 6019 6020 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6021 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 6022 6023 expressions = self._parse_wrapped_csv( 6024 self._parse_primary_key_part, optional=wrapped_optional 6025 ) 6026 options = self._parse_key_constraint_options() 6027 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6028 6029 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6030 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6031 6032 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6033 """ 6034 Parses a datetime column in ODBC format. We parse the column into the corresponding 6035 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6036 same as we did for `DATE('yyyy-mm-dd')`. 6037 6038 Reference: 6039 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6040 """ 6041 self._match(TokenType.VAR) 6042 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6043 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6044 if not self._match(TokenType.R_BRACE): 6045 self.raise_error("Expected }") 6046 return expression 6047 6048 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6049 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6050 return this 6051 6052 bracket_kind = self._prev.token_type 6053 if ( 6054 bracket_kind == TokenType.L_BRACE 6055 and self._curr 6056 and self._curr.token_type == TokenType.VAR 6057 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6058 ): 6059 return self._parse_odbc_datetime_literal() 6060 6061 expressions = self._parse_csv( 6062 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6063 ) 6064 6065 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6066 self.raise_error("Expected ]") 6067 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6068 self.raise_error("Expected }") 6069 6070 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6071 if bracket_kind == TokenType.L_BRACE: 6072 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6073 elif not this: 6074 this = build_array_constructor( 6075 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6076 ) 6077 else: 6078 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6079 if constructor_type: 6080 return build_array_constructor( 6081 constructor_type, 6082 args=expressions, 6083 bracket_kind=bracket_kind, 6084 dialect=self.dialect, 6085 ) 6086 6087 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 6088 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6089 6090 self._add_comments(this) 6091 return self._parse_bracket(this) 6092 6093 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6094 if self._match(TokenType.COLON): 6095 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6096 return this 6097 6098 def _parse_case(self) -> t.Optional[exp.Expression]: 6099 ifs = [] 6100 default = None 6101 6102 comments = self._prev_comments 6103 expression = self._parse_assignment() 6104 6105 while self._match(TokenType.WHEN): 6106 this = self._parse_assignment() 6107 self._match(TokenType.THEN) 6108 then = self._parse_assignment() 6109 ifs.append(self.expression(exp.If, this=this, true=then)) 6110 6111 if self._match(TokenType.ELSE): 6112 default = self._parse_assignment() 6113 6114 if not self._match(TokenType.END): 6115 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6116 default = exp.column("interval") 6117 else: 6118 self.raise_error("Expected END after CASE", self._prev) 6119 6120 return self.expression( 6121 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6122 ) 6123 6124 def _parse_if(self) -> t.Optional[exp.Expression]: 6125 if self._match(TokenType.L_PAREN): 6126 args = self._parse_csv(self._parse_assignment) 6127 this = self.validate_expression(exp.If.from_arg_list(args), args) 6128 self._match_r_paren() 6129 else: 6130 index = self._index - 1 6131 6132 if self.NO_PAREN_IF_COMMANDS and index == 0: 6133 return self._parse_as_command(self._prev) 6134 6135 condition = self._parse_assignment() 6136 6137 if not condition: 6138 self._retreat(index) 6139 return None 6140 6141 self._match(TokenType.THEN) 6142 true = self._parse_assignment() 6143 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6144 self._match(TokenType.END) 6145 this = self.expression(exp.If, this=condition, true=true, false=false) 6146 6147 return this 6148 6149 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6150 if not self._match_text_seq("VALUE", "FOR"): 6151 self._retreat(self._index - 1) 6152 return None 6153 6154 return self.expression( 6155 exp.NextValueFor, 6156 this=self._parse_column(), 6157 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6158 ) 6159 6160 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6161 this = self._parse_function() or self._parse_var_or_string(upper=True) 6162 6163 if self._match(TokenType.FROM): 6164 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6165 6166 if not self._match(TokenType.COMMA): 6167 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6168 6169 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6170 6171 def _parse_gap_fill(self) -> exp.GapFill: 6172 self._match(TokenType.TABLE) 6173 this = self._parse_table() 6174 6175 self._match(TokenType.COMMA) 6176 args = [this, *self._parse_csv(self._parse_lambda)] 6177 6178 gap_fill = exp.GapFill.from_arg_list(args) 6179 return self.validate_expression(gap_fill, args) 6180 6181 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6182 this = self._parse_assignment() 6183 6184 if not self._match(TokenType.ALIAS): 6185 if self._match(TokenType.COMMA): 6186 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6187 6188 self.raise_error("Expected AS after CAST") 6189 6190 fmt = None 6191 to = self._parse_types() 6192 6193 default = self._match(TokenType.DEFAULT) 6194 if default: 6195 default = self._parse_bitwise() 6196 self._match_text_seq("ON", "CONVERSION", "ERROR") 6197 6198 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6199 fmt_string = self._parse_string() 6200 fmt = self._parse_at_time_zone(fmt_string) 6201 6202 if not to: 6203 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6204 if to.this in exp.DataType.TEMPORAL_TYPES: 6205 this = self.expression( 6206 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6207 this=this, 6208 format=exp.Literal.string( 6209 format_time( 6210 fmt_string.this if fmt_string else "", 6211 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6212 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6213 ) 6214 ), 6215 safe=safe, 6216 ) 6217 6218 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6219 this.set("zone", fmt.args["zone"]) 6220 return this 6221 elif not to: 6222 self.raise_error("Expected TYPE after CAST") 6223 elif isinstance(to, exp.Identifier): 6224 to = exp.DataType.build(to.name, udt=True) 6225 elif to.this == exp.DataType.Type.CHAR: 6226 if self._match(TokenType.CHARACTER_SET): 6227 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6228 6229 return self.expression( 6230 exp.Cast if strict else exp.TryCast, 6231 this=this, 6232 to=to, 6233 format=fmt, 6234 safe=safe, 6235 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6236 default=default, 6237 ) 6238 6239 def _parse_string_agg(self) -> exp.GroupConcat: 6240 if self._match(TokenType.DISTINCT): 6241 args: t.List[t.Optional[exp.Expression]] = [ 6242 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6243 ] 6244 if self._match(TokenType.COMMA): 6245 args.extend(self._parse_csv(self._parse_assignment)) 6246 else: 6247 args = self._parse_csv(self._parse_assignment) # type: ignore 6248 6249 if self._match_text_seq("ON", "OVERFLOW"): 6250 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6251 if self._match_text_seq("ERROR"): 6252 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6253 else: 6254 self._match_text_seq("TRUNCATE") 6255 on_overflow = self.expression( 6256 exp.OverflowTruncateBehavior, 6257 this=self._parse_string(), 6258 with_count=( 6259 self._match_text_seq("WITH", "COUNT") 6260 or not self._match_text_seq("WITHOUT", "COUNT") 6261 ), 6262 ) 6263 else: 6264 on_overflow = None 6265 6266 index = self._index 6267 if not self._match(TokenType.R_PAREN) and args: 6268 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6269 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6270 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6271 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6272 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6273 6274 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6275 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6276 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6277 if not self._match_text_seq("WITHIN", "GROUP"): 6278 self._retreat(index) 6279 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6280 6281 # The corresponding match_r_paren will be called in parse_function (caller) 6282 self._match_l_paren() 6283 6284 return self.expression( 6285 exp.GroupConcat, 6286 this=self._parse_order(this=seq_get(args, 0)), 6287 separator=seq_get(args, 1), 6288 on_overflow=on_overflow, 6289 ) 6290 6291 def _parse_convert( 6292 self, strict: bool, safe: t.Optional[bool] = None 6293 ) -> t.Optional[exp.Expression]: 6294 this = self._parse_bitwise() 6295 6296 if self._match(TokenType.USING): 6297 to: t.Optional[exp.Expression] = self.expression( 6298 exp.CharacterSet, this=self._parse_var() 6299 ) 6300 elif self._match(TokenType.COMMA): 6301 to = self._parse_types() 6302 else: 6303 to = None 6304 6305 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6306 6307 def _parse_xml_table(self) -> exp.XMLTable: 6308 namespaces = None 6309 passing = None 6310 columns = None 6311 6312 if self._match_text_seq("XMLNAMESPACES", "("): 6313 namespaces = self._parse_xml_namespace() 6314 self._match_text_seq(")", ",") 6315 6316 this = self._parse_string() 6317 6318 if self._match_text_seq("PASSING"): 6319 # The BY VALUE keywords are optional and are provided for semantic clarity 6320 self._match_text_seq("BY", "VALUE") 6321 passing = self._parse_csv(self._parse_column) 6322 6323 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6324 6325 if self._match_text_seq("COLUMNS"): 6326 columns = self._parse_csv(self._parse_field_def) 6327 6328 return self.expression( 6329 exp.XMLTable, 6330 this=this, 6331 namespaces=namespaces, 6332 passing=passing, 6333 columns=columns, 6334 by_ref=by_ref, 6335 ) 6336 6337 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6338 namespaces = [] 6339 6340 while True: 6341 if self._match(TokenType.DEFAULT): 6342 uri = self._parse_string() 6343 else: 6344 uri = self._parse_alias(self._parse_string()) 6345 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6346 if not self._match(TokenType.COMMA): 6347 break 6348 6349 return namespaces 6350 6351 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6352 """ 6353 There are generally two variants of the DECODE function: 6354 6355 - DECODE(bin, charset) 6356 - DECODE(expression, search, result [, search, result] ... [, default]) 6357 6358 The second variant will always be parsed into a CASE expression. Note that NULL 6359 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6360 instead of relying on pattern matching. 6361 """ 6362 args = self._parse_csv(self._parse_assignment) 6363 6364 if len(args) < 3: 6365 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6366 6367 expression, *expressions = args 6368 if not expression: 6369 return None 6370 6371 ifs = [] 6372 for search, result in zip(expressions[::2], expressions[1::2]): 6373 if not search or not result: 6374 return None 6375 6376 if isinstance(search, exp.Literal): 6377 ifs.append( 6378 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6379 ) 6380 elif isinstance(search, exp.Null): 6381 ifs.append( 6382 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6383 ) 6384 else: 6385 cond = exp.or_( 6386 exp.EQ(this=expression.copy(), expression=search), 6387 exp.and_( 6388 exp.Is(this=expression.copy(), expression=exp.Null()), 6389 exp.Is(this=search.copy(), expression=exp.Null()), 6390 copy=False, 6391 ), 6392 copy=False, 6393 ) 6394 ifs.append(exp.If(this=cond, true=result)) 6395 6396 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6397 6398 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6399 self._match_text_seq("KEY") 6400 key = self._parse_column() 6401 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6402 self._match_text_seq("VALUE") 6403 value = self._parse_bitwise() 6404 6405 if not key and not value: 6406 return None 6407 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6408 6409 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6410 if not this or not self._match_text_seq("FORMAT", "JSON"): 6411 return this 6412 6413 return self.expression(exp.FormatJson, this=this) 6414 6415 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6416 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6417 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6418 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6419 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6420 else: 6421 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6422 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6423 6424 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6425 6426 if not empty and not error and not null: 6427 return None 6428 6429 return self.expression( 6430 exp.OnCondition, 6431 empty=empty, 6432 error=error, 6433 null=null, 6434 ) 6435 6436 def _parse_on_handling( 6437 self, on: str, *values: str 6438 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6439 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6440 for value in values: 6441 if self._match_text_seq(value, "ON", on): 6442 return f"{value} ON {on}" 6443 6444 index = self._index 6445 if self._match(TokenType.DEFAULT): 6446 default_value = self._parse_bitwise() 6447 if self._match_text_seq("ON", on): 6448 return default_value 6449 6450 self._retreat(index) 6451 6452 return None 6453 6454 @t.overload 6455 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6456 6457 @t.overload 6458 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6459 6460 def _parse_json_object(self, agg=False): 6461 star = self._parse_star() 6462 expressions = ( 6463 [star] 6464 if star 6465 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6466 ) 6467 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6468 6469 unique_keys = None 6470 if self._match_text_seq("WITH", "UNIQUE"): 6471 unique_keys = True 6472 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6473 unique_keys = False 6474 6475 self._match_text_seq("KEYS") 6476 6477 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6478 self._parse_type() 6479 ) 6480 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6481 6482 return self.expression( 6483 exp.JSONObjectAgg if agg else exp.JSONObject, 6484 expressions=expressions, 6485 null_handling=null_handling, 6486 unique_keys=unique_keys, 6487 return_type=return_type, 6488 encoding=encoding, 6489 ) 6490 6491 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6492 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6493 if not self._match_text_seq("NESTED"): 6494 this = self._parse_id_var() 6495 kind = self._parse_types(allow_identifiers=False) 6496 nested = None 6497 else: 6498 this = None 6499 kind = None 6500 nested = True 6501 6502 path = self._match_text_seq("PATH") and self._parse_string() 6503 nested_schema = nested and self._parse_json_schema() 6504 6505 return self.expression( 6506 exp.JSONColumnDef, 6507 this=this, 6508 kind=kind, 6509 path=path, 6510 nested_schema=nested_schema, 6511 ) 6512 6513 def _parse_json_schema(self) -> exp.JSONSchema: 6514 self._match_text_seq("COLUMNS") 6515 return self.expression( 6516 exp.JSONSchema, 6517 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6518 ) 6519 6520 def _parse_json_table(self) -> exp.JSONTable: 6521 this = self._parse_format_json(self._parse_bitwise()) 6522 path = self._match(TokenType.COMMA) and self._parse_string() 6523 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6524 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6525 schema = self._parse_json_schema() 6526 6527 return exp.JSONTable( 6528 this=this, 6529 schema=schema, 6530 path=path, 6531 error_handling=error_handling, 6532 empty_handling=empty_handling, 6533 ) 6534 6535 def _parse_match_against(self) -> exp.MatchAgainst: 6536 expressions = self._parse_csv(self._parse_column) 6537 6538 self._match_text_seq(")", "AGAINST", "(") 6539 6540 this = self._parse_string() 6541 6542 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6543 modifier = "IN NATURAL LANGUAGE MODE" 6544 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6545 modifier = f"{modifier} WITH QUERY EXPANSION" 6546 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6547 modifier = "IN BOOLEAN MODE" 6548 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6549 modifier = "WITH QUERY EXPANSION" 6550 else: 6551 modifier = None 6552 6553 return self.expression( 6554 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6555 ) 6556 6557 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6558 def _parse_open_json(self) -> exp.OpenJSON: 6559 this = self._parse_bitwise() 6560 path = self._match(TokenType.COMMA) and self._parse_string() 6561 6562 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6563 this = self._parse_field(any_token=True) 6564 kind = self._parse_types() 6565 path = self._parse_string() 6566 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6567 6568 return self.expression( 6569 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6570 ) 6571 6572 expressions = None 6573 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6574 self._match_l_paren() 6575 expressions = self._parse_csv(_parse_open_json_column_def) 6576 6577 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6578 6579 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6580 args = self._parse_csv(self._parse_bitwise) 6581 6582 if self._match(TokenType.IN): 6583 return self.expression( 6584 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6585 ) 6586 6587 if haystack_first: 6588 haystack = seq_get(args, 0) 6589 needle = seq_get(args, 1) 6590 else: 6591 haystack = seq_get(args, 1) 6592 needle = seq_get(args, 0) 6593 6594 return self.expression( 6595 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6596 ) 6597 6598 def _parse_predict(self) -> exp.Predict: 6599 self._match_text_seq("MODEL") 6600 this = self._parse_table() 6601 6602 self._match(TokenType.COMMA) 6603 self._match_text_seq("TABLE") 6604 6605 return self.expression( 6606 exp.Predict, 6607 this=this, 6608 expression=self._parse_table(), 6609 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6610 ) 6611 6612 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6613 args = self._parse_csv(self._parse_table) 6614 return exp.JoinHint(this=func_name.upper(), expressions=args) 6615 6616 def _parse_substring(self) -> exp.Substring: 6617 # Postgres supports the form: substring(string [from int] [for int]) 6618 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6619 6620 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6621 6622 if self._match(TokenType.FROM): 6623 args.append(self._parse_bitwise()) 6624 if self._match(TokenType.FOR): 6625 if len(args) == 1: 6626 args.append(exp.Literal.number(1)) 6627 args.append(self._parse_bitwise()) 6628 6629 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6630 6631 def _parse_trim(self) -> exp.Trim: 6632 # https://www.w3resource.com/sql/character-functions/trim.php 6633 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6634 6635 position = None 6636 collation = None 6637 expression = None 6638 6639 if self._match_texts(self.TRIM_TYPES): 6640 position = self._prev.text.upper() 6641 6642 this = self._parse_bitwise() 6643 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6644 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6645 expression = self._parse_bitwise() 6646 6647 if invert_order: 6648 this, expression = expression, this 6649 6650 if self._match(TokenType.COLLATE): 6651 collation = self._parse_bitwise() 6652 6653 return self.expression( 6654 exp.Trim, this=this, position=position, expression=expression, collation=collation 6655 ) 6656 6657 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6658 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6659 6660 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6661 return self._parse_window(self._parse_id_var(), alias=True) 6662 6663 def _parse_respect_or_ignore_nulls( 6664 self, this: t.Optional[exp.Expression] 6665 ) -> t.Optional[exp.Expression]: 6666 if self._match_text_seq("IGNORE", "NULLS"): 6667 return self.expression(exp.IgnoreNulls, this=this) 6668 if self._match_text_seq("RESPECT", "NULLS"): 6669 return self.expression(exp.RespectNulls, this=this) 6670 return this 6671 6672 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6673 if self._match(TokenType.HAVING): 6674 self._match_texts(("MAX", "MIN")) 6675 max = self._prev.text.upper() != "MIN" 6676 return self.expression( 6677 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6678 ) 6679 6680 return this 6681 6682 def _parse_window( 6683 self, this: t.Optional[exp.Expression], alias: bool = False 6684 ) -> t.Optional[exp.Expression]: 6685 func = this 6686 comments = func.comments if isinstance(func, exp.Expression) else None 6687 6688 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6689 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6690 if self._match_text_seq("WITHIN", "GROUP"): 6691 order = self._parse_wrapped(self._parse_order) 6692 this = self.expression(exp.WithinGroup, this=this, expression=order) 6693 6694 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6695 self._match(TokenType.WHERE) 6696 this = self.expression( 6697 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6698 ) 6699 self._match_r_paren() 6700 6701 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6702 # Some dialects choose to implement and some do not. 6703 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6704 6705 # There is some code above in _parse_lambda that handles 6706 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6707 6708 # The below changes handle 6709 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6710 6711 # Oracle allows both formats 6712 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6713 # and Snowflake chose to do the same for familiarity 6714 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6715 if isinstance(this, exp.AggFunc): 6716 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6717 6718 if ignore_respect and ignore_respect is not this: 6719 ignore_respect.replace(ignore_respect.this) 6720 this = self.expression(ignore_respect.__class__, this=this) 6721 6722 this = self._parse_respect_or_ignore_nulls(this) 6723 6724 # bigquery select from window x AS (partition by ...) 6725 if alias: 6726 over = None 6727 self._match(TokenType.ALIAS) 6728 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6729 return this 6730 else: 6731 over = self._prev.text.upper() 6732 6733 if comments and isinstance(func, exp.Expression): 6734 func.pop_comments() 6735 6736 if not self._match(TokenType.L_PAREN): 6737 return self.expression( 6738 exp.Window, 6739 comments=comments, 6740 this=this, 6741 alias=self._parse_id_var(False), 6742 over=over, 6743 ) 6744 6745 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6746 6747 first = self._match(TokenType.FIRST) 6748 if self._match_text_seq("LAST"): 6749 first = False 6750 6751 partition, order = self._parse_partition_and_order() 6752 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6753 6754 if kind: 6755 self._match(TokenType.BETWEEN) 6756 start = self._parse_window_spec() 6757 self._match(TokenType.AND) 6758 end = self._parse_window_spec() 6759 6760 spec = self.expression( 6761 exp.WindowSpec, 6762 kind=kind, 6763 start=start["value"], 6764 start_side=start["side"], 6765 end=end["value"], 6766 end_side=end["side"], 6767 ) 6768 else: 6769 spec = None 6770 6771 self._match_r_paren() 6772 6773 window = self.expression( 6774 exp.Window, 6775 comments=comments, 6776 this=this, 6777 partition_by=partition, 6778 order=order, 6779 spec=spec, 6780 alias=window_alias, 6781 over=over, 6782 first=first, 6783 ) 6784 6785 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6786 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6787 return self._parse_window(window, alias=alias) 6788 6789 return window 6790 6791 def _parse_partition_and_order( 6792 self, 6793 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6794 return self._parse_partition_by(), self._parse_order() 6795 6796 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6797 self._match(TokenType.BETWEEN) 6798 6799 return { 6800 "value": ( 6801 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6802 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6803 or self._parse_bitwise() 6804 ), 6805 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6806 } 6807 6808 def _parse_alias( 6809 self, this: t.Optional[exp.Expression], explicit: bool = False 6810 ) -> t.Optional[exp.Expression]: 6811 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6812 # so this section tries to parse the clause version and if it fails, it treats the token 6813 # as an identifier (alias) 6814 if self._can_parse_limit_or_offset(): 6815 return this 6816 6817 any_token = self._match(TokenType.ALIAS) 6818 comments = self._prev_comments or [] 6819 6820 if explicit and not any_token: 6821 return this 6822 6823 if self._match(TokenType.L_PAREN): 6824 aliases = self.expression( 6825 exp.Aliases, 6826 comments=comments, 6827 this=this, 6828 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6829 ) 6830 self._match_r_paren(aliases) 6831 return aliases 6832 6833 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6834 self.STRING_ALIASES and self._parse_string_as_identifier() 6835 ) 6836 6837 if alias: 6838 comments.extend(alias.pop_comments()) 6839 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6840 column = this.this 6841 6842 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6843 if not this.comments and column and column.comments: 6844 this.comments = column.pop_comments() 6845 6846 return this 6847 6848 def _parse_id_var( 6849 self, 6850 any_token: bool = True, 6851 tokens: t.Optional[t.Collection[TokenType]] = None, 6852 ) -> t.Optional[exp.Expression]: 6853 expression = self._parse_identifier() 6854 if not expression and ( 6855 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6856 ): 6857 quoted = self._prev.token_type == TokenType.STRING 6858 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6859 6860 return expression 6861 6862 def _parse_string(self) -> t.Optional[exp.Expression]: 6863 if self._match_set(self.STRING_PARSERS): 6864 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6865 return self._parse_placeholder() 6866 6867 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6868 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6869 6870 def _parse_number(self) -> t.Optional[exp.Expression]: 6871 if self._match_set(self.NUMERIC_PARSERS): 6872 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6873 return self._parse_placeholder() 6874 6875 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6876 if self._match(TokenType.IDENTIFIER): 6877 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6878 return self._parse_placeholder() 6879 6880 def _parse_var( 6881 self, 6882 any_token: bool = False, 6883 tokens: t.Optional[t.Collection[TokenType]] = None, 6884 upper: bool = False, 6885 ) -> t.Optional[exp.Expression]: 6886 if ( 6887 (any_token and self._advance_any()) 6888 or self._match(TokenType.VAR) 6889 or (self._match_set(tokens) if tokens else False) 6890 ): 6891 return self.expression( 6892 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6893 ) 6894 return self._parse_placeholder() 6895 6896 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6897 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6898 self._advance() 6899 return self._prev 6900 return None 6901 6902 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6903 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6904 6905 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6906 return self._parse_primary() or self._parse_var(any_token=True) 6907 6908 def _parse_null(self) -> t.Optional[exp.Expression]: 6909 if self._match_set(self.NULL_TOKENS): 6910 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6911 return self._parse_placeholder() 6912 6913 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6914 if self._match(TokenType.TRUE): 6915 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6916 if self._match(TokenType.FALSE): 6917 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6918 return self._parse_placeholder() 6919 6920 def _parse_star(self) -> t.Optional[exp.Expression]: 6921 if self._match(TokenType.STAR): 6922 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6923 return self._parse_placeholder() 6924 6925 def _parse_parameter(self) -> exp.Parameter: 6926 this = self._parse_identifier() or self._parse_primary_or_var() 6927 return self.expression(exp.Parameter, this=this) 6928 6929 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6930 if self._match_set(self.PLACEHOLDER_PARSERS): 6931 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6932 if placeholder: 6933 return placeholder 6934 self._advance(-1) 6935 return None 6936 6937 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6938 if not self._match_texts(keywords): 6939 return None 6940 if self._match(TokenType.L_PAREN, advance=False): 6941 return self._parse_wrapped_csv(self._parse_expression) 6942 6943 expression = self._parse_expression() 6944 return [expression] if expression else None 6945 6946 def _parse_csv( 6947 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6948 ) -> t.List[exp.Expression]: 6949 parse_result = parse_method() 6950 items = [parse_result] if parse_result is not None else [] 6951 6952 while self._match(sep): 6953 self._add_comments(parse_result) 6954 parse_result = parse_method() 6955 if parse_result is not None: 6956 items.append(parse_result) 6957 6958 return items 6959 6960 def _parse_tokens( 6961 self, parse_method: t.Callable, expressions: t.Dict 6962 ) -> t.Optional[exp.Expression]: 6963 this = parse_method() 6964 6965 while self._match_set(expressions): 6966 this = self.expression( 6967 expressions[self._prev.token_type], 6968 this=this, 6969 comments=self._prev_comments, 6970 expression=parse_method(), 6971 ) 6972 6973 return this 6974 6975 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6976 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6977 6978 def _parse_wrapped_csv( 6979 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6980 ) -> t.List[exp.Expression]: 6981 return self._parse_wrapped( 6982 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6983 ) 6984 6985 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6986 wrapped = self._match(TokenType.L_PAREN) 6987 if not wrapped and not optional: 6988 self.raise_error("Expecting (") 6989 parse_result = parse_method() 6990 if wrapped: 6991 self._match_r_paren() 6992 return parse_result 6993 6994 def _parse_expressions(self) -> t.List[exp.Expression]: 6995 return self._parse_csv(self._parse_expression) 6996 6997 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6998 return self._parse_select() or self._parse_set_operations( 6999 self._parse_alias(self._parse_assignment(), explicit=True) 7000 if alias 7001 else self._parse_assignment() 7002 ) 7003 7004 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7005 return self._parse_query_modifiers( 7006 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7007 ) 7008 7009 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7010 this = None 7011 if self._match_texts(self.TRANSACTION_KIND): 7012 this = self._prev.text 7013 7014 self._match_texts(("TRANSACTION", "WORK")) 7015 7016 modes = [] 7017 while True: 7018 mode = [] 7019 while self._match(TokenType.VAR): 7020 mode.append(self._prev.text) 7021 7022 if mode: 7023 modes.append(" ".join(mode)) 7024 if not self._match(TokenType.COMMA): 7025 break 7026 7027 return self.expression(exp.Transaction, this=this, modes=modes) 7028 7029 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7030 chain = None 7031 savepoint = None 7032 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7033 7034 self._match_texts(("TRANSACTION", "WORK")) 7035 7036 if self._match_text_seq("TO"): 7037 self._match_text_seq("SAVEPOINT") 7038 savepoint = self._parse_id_var() 7039 7040 if self._match(TokenType.AND): 7041 chain = not self._match_text_seq("NO") 7042 self._match_text_seq("CHAIN") 7043 7044 if is_rollback: 7045 return self.expression(exp.Rollback, savepoint=savepoint) 7046 7047 return self.expression(exp.Commit, chain=chain) 7048 7049 def _parse_refresh(self) -> exp.Refresh: 7050 self._match(TokenType.TABLE) 7051 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7052 7053 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7054 if not self._match_text_seq("ADD"): 7055 return None 7056 7057 self._match(TokenType.COLUMN) 7058 exists_column = self._parse_exists(not_=True) 7059 expression = self._parse_field_def() 7060 7061 if expression: 7062 expression.set("exists", exists_column) 7063 7064 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7065 if self._match_texts(("FIRST", "AFTER")): 7066 position = self._prev.text 7067 column_position = self.expression( 7068 exp.ColumnPosition, this=self._parse_column(), position=position 7069 ) 7070 expression.set("position", column_position) 7071 7072 return expression 7073 7074 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7075 drop = self._match(TokenType.DROP) and self._parse_drop() 7076 if drop and not isinstance(drop, exp.Command): 7077 drop.set("kind", drop.args.get("kind", "COLUMN")) 7078 return drop 7079 7080 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7081 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7082 return self.expression( 7083 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7084 ) 7085 7086 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7087 index = self._index - 1 7088 7089 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7090 return self._parse_csv( 7091 lambda: self.expression( 7092 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7093 ) 7094 ) 7095 7096 self._retreat(index) 7097 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7098 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7099 7100 if self._match_text_seq("ADD", "COLUMNS"): 7101 schema = self._parse_schema() 7102 if schema: 7103 return [schema] 7104 return [] 7105 7106 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7107 7108 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7109 if self._match_texts(self.ALTER_ALTER_PARSERS): 7110 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7111 7112 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7113 # keyword after ALTER we default to parsing this statement 7114 self._match(TokenType.COLUMN) 7115 column = self._parse_field(any_token=True) 7116 7117 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7118 return self.expression(exp.AlterColumn, this=column, drop=True) 7119 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7120 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7121 if self._match(TokenType.COMMENT): 7122 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7123 if self._match_text_seq("DROP", "NOT", "NULL"): 7124 return self.expression( 7125 exp.AlterColumn, 7126 this=column, 7127 drop=True, 7128 allow_null=True, 7129 ) 7130 if self._match_text_seq("SET", "NOT", "NULL"): 7131 return self.expression( 7132 exp.AlterColumn, 7133 this=column, 7134 allow_null=False, 7135 ) 7136 7137 if self._match_text_seq("SET", "VISIBLE"): 7138 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7139 if self._match_text_seq("SET", "INVISIBLE"): 7140 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7141 7142 self._match_text_seq("SET", "DATA") 7143 self._match_text_seq("TYPE") 7144 return self.expression( 7145 exp.AlterColumn, 7146 this=column, 7147 dtype=self._parse_types(), 7148 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7149 using=self._match(TokenType.USING) and self._parse_assignment(), 7150 ) 7151 7152 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7153 if self._match_texts(("ALL", "EVEN", "AUTO")): 7154 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7155 7156 self._match_text_seq("KEY", "DISTKEY") 7157 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7158 7159 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7160 if compound: 7161 self._match_text_seq("SORTKEY") 7162 7163 if self._match(TokenType.L_PAREN, advance=False): 7164 return self.expression( 7165 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7166 ) 7167 7168 self._match_texts(("AUTO", "NONE")) 7169 return self.expression( 7170 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7171 ) 7172 7173 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7174 index = self._index - 1 7175 7176 partition_exists = self._parse_exists() 7177 if self._match(TokenType.PARTITION, advance=False): 7178 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7179 7180 self._retreat(index) 7181 return self._parse_csv(self._parse_drop_column) 7182 7183 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7184 if self._match(TokenType.COLUMN): 7185 exists = self._parse_exists() 7186 old_column = self._parse_column() 7187 to = self._match_text_seq("TO") 7188 new_column = self._parse_column() 7189 7190 if old_column is None or to is None or new_column is None: 7191 return None 7192 7193 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7194 7195 self._match_text_seq("TO") 7196 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7197 7198 def _parse_alter_table_set(self) -> exp.AlterSet: 7199 alter_set = self.expression(exp.AlterSet) 7200 7201 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7202 "TABLE", "PROPERTIES" 7203 ): 7204 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7205 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7206 alter_set.set("expressions", [self._parse_assignment()]) 7207 elif self._match_texts(("LOGGED", "UNLOGGED")): 7208 alter_set.set("option", exp.var(self._prev.text.upper())) 7209 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7210 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7211 elif self._match_text_seq("LOCATION"): 7212 alter_set.set("location", self._parse_field()) 7213 elif self._match_text_seq("ACCESS", "METHOD"): 7214 alter_set.set("access_method", self._parse_field()) 7215 elif self._match_text_seq("TABLESPACE"): 7216 alter_set.set("tablespace", self._parse_field()) 7217 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7218 alter_set.set("file_format", [self._parse_field()]) 7219 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7220 alter_set.set("file_format", self._parse_wrapped_options()) 7221 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7222 alter_set.set("copy_options", self._parse_wrapped_options()) 7223 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7224 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7225 else: 7226 if self._match_text_seq("SERDE"): 7227 alter_set.set("serde", self._parse_field()) 7228 7229 alter_set.set("expressions", [self._parse_properties()]) 7230 7231 return alter_set 7232 7233 def _parse_alter(self) -> exp.Alter | exp.Command: 7234 start = self._prev 7235 7236 alter_token = self._match_set(self.ALTERABLES) and self._prev 7237 if not alter_token: 7238 return self._parse_as_command(start) 7239 7240 exists = self._parse_exists() 7241 only = self._match_text_seq("ONLY") 7242 this = self._parse_table(schema=True) 7243 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7244 7245 if self._next: 7246 self._advance() 7247 7248 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7249 if parser: 7250 actions = ensure_list(parser(self)) 7251 not_valid = self._match_text_seq("NOT", "VALID") 7252 options = self._parse_csv(self._parse_property) 7253 7254 if not self._curr and actions: 7255 return self.expression( 7256 exp.Alter, 7257 this=this, 7258 kind=alter_token.text.upper(), 7259 exists=exists, 7260 actions=actions, 7261 only=only, 7262 options=options, 7263 cluster=cluster, 7264 not_valid=not_valid, 7265 ) 7266 7267 return self._parse_as_command(start) 7268 7269 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7270 start = self._prev 7271 # https://duckdb.org/docs/sql/statements/analyze 7272 if not self._curr: 7273 return self.expression(exp.Analyze) 7274 7275 options = [] 7276 while self._match_texts(self.ANALYZE_STYLES): 7277 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7278 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7279 else: 7280 options.append(self._prev.text.upper()) 7281 7282 this: t.Optional[exp.Expression] = None 7283 inner_expression: t.Optional[exp.Expression] = None 7284 7285 kind = self._curr and self._curr.text.upper() 7286 7287 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7288 this = self._parse_table_parts() 7289 elif self._match_text_seq("TABLES"): 7290 if self._match_set((TokenType.FROM, TokenType.IN)): 7291 kind = f"{kind} {self._prev.text.upper()}" 7292 this = self._parse_table(schema=True, is_db_reference=True) 7293 elif self._match_text_seq("DATABASE"): 7294 this = self._parse_table(schema=True, is_db_reference=True) 7295 elif self._match_text_seq("CLUSTER"): 7296 this = self._parse_table() 7297 # Try matching inner expr keywords before fallback to parse table. 7298 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7299 kind = None 7300 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7301 else: 7302 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7303 kind = None 7304 this = self._parse_table_parts() 7305 7306 partition = self._try_parse(self._parse_partition) 7307 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7308 return self._parse_as_command(start) 7309 7310 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7311 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7312 "WITH", "ASYNC", "MODE" 7313 ): 7314 mode = f"WITH {self._tokens[self._index-2].text.upper()} MODE" 7315 else: 7316 mode = None 7317 7318 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7319 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7320 7321 properties = self._parse_properties() 7322 return self.expression( 7323 exp.Analyze, 7324 kind=kind, 7325 this=this, 7326 mode=mode, 7327 partition=partition, 7328 properties=properties, 7329 expression=inner_expression, 7330 options=options, 7331 ) 7332 7333 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7334 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7335 this = None 7336 kind = self._prev.text.upper() 7337 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7338 expressions = [] 7339 7340 if not self._match_text_seq("STATISTICS"): 7341 self.raise_error("Expecting token STATISTICS") 7342 7343 if self._match_text_seq("NOSCAN"): 7344 this = "NOSCAN" 7345 elif self._match(TokenType.FOR): 7346 if self._match_text_seq("ALL", "COLUMNS"): 7347 this = "FOR ALL COLUMNS" 7348 if self._match_texts("COLUMNS"): 7349 this = "FOR COLUMNS" 7350 expressions = self._parse_csv(self._parse_column_reference) 7351 elif self._match_text_seq("SAMPLE"): 7352 sample = self._parse_number() 7353 expressions = [ 7354 self.expression( 7355 exp.AnalyzeSample, 7356 sample=sample, 7357 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7358 ) 7359 ] 7360 7361 return self.expression( 7362 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7363 ) 7364 7365 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7366 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7367 kind = None 7368 this = None 7369 expression: t.Optional[exp.Expression] = None 7370 if self._match_text_seq("REF", "UPDATE"): 7371 kind = "REF" 7372 this = "UPDATE" 7373 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7374 this = "UPDATE SET DANGLING TO NULL" 7375 elif self._match_text_seq("STRUCTURE"): 7376 kind = "STRUCTURE" 7377 if self._match_text_seq("CASCADE", "FAST"): 7378 this = "CASCADE FAST" 7379 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7380 ("ONLINE", "OFFLINE") 7381 ): 7382 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7383 expression = self._parse_into() 7384 7385 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7386 7387 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7388 this = self._prev.text.upper() 7389 if self._match_text_seq("COLUMNS"): 7390 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7391 return None 7392 7393 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7394 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7395 if self._match_text_seq("STATISTICS"): 7396 return self.expression(exp.AnalyzeDelete, kind=kind) 7397 return None 7398 7399 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7400 if self._match_text_seq("CHAINED", "ROWS"): 7401 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7402 return None 7403 7404 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7405 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7406 this = self._prev.text.upper() 7407 expression: t.Optional[exp.Expression] = None 7408 expressions = [] 7409 update_options = None 7410 7411 if self._match_text_seq("HISTOGRAM", "ON"): 7412 expressions = self._parse_csv(self._parse_column_reference) 7413 with_expressions = [] 7414 while self._match(TokenType.WITH): 7415 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7416 if self._match_texts(("SYNC", "ASYNC")): 7417 if self._match_text_seq("MODE", advance=False): 7418 with_expressions.append(f"{self._prev.text.upper()} MODE") 7419 self._advance() 7420 else: 7421 buckets = self._parse_number() 7422 if self._match_text_seq("BUCKETS"): 7423 with_expressions.append(f"{buckets} BUCKETS") 7424 if with_expressions: 7425 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7426 7427 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7428 TokenType.UPDATE, advance=False 7429 ): 7430 update_options = self._prev.text.upper() 7431 self._advance() 7432 elif self._match_text_seq("USING", "DATA"): 7433 expression = self.expression(exp.UsingData, this=self._parse_string()) 7434 7435 return self.expression( 7436 exp.AnalyzeHistogram, 7437 this=this, 7438 expressions=expressions, 7439 expression=expression, 7440 update_options=update_options, 7441 ) 7442 7443 def _parse_merge(self) -> exp.Merge: 7444 self._match(TokenType.INTO) 7445 target = self._parse_table() 7446 7447 if target and self._match(TokenType.ALIAS, advance=False): 7448 target.set("alias", self._parse_table_alias()) 7449 7450 self._match(TokenType.USING) 7451 using = self._parse_table() 7452 7453 self._match(TokenType.ON) 7454 on = self._parse_assignment() 7455 7456 return self.expression( 7457 exp.Merge, 7458 this=target, 7459 using=using, 7460 on=on, 7461 whens=self._parse_when_matched(), 7462 returning=self._parse_returning(), 7463 ) 7464 7465 def _parse_when_matched(self) -> exp.Whens: 7466 whens = [] 7467 7468 while self._match(TokenType.WHEN): 7469 matched = not self._match(TokenType.NOT) 7470 self._match_text_seq("MATCHED") 7471 source = ( 7472 False 7473 if self._match_text_seq("BY", "TARGET") 7474 else self._match_text_seq("BY", "SOURCE") 7475 ) 7476 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7477 7478 self._match(TokenType.THEN) 7479 7480 if self._match(TokenType.INSERT): 7481 this = self._parse_star() 7482 if this: 7483 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7484 else: 7485 then = self.expression( 7486 exp.Insert, 7487 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7488 expression=self._match_text_seq("VALUES") and self._parse_value(), 7489 ) 7490 elif self._match(TokenType.UPDATE): 7491 expressions = self._parse_star() 7492 if expressions: 7493 then = self.expression(exp.Update, expressions=expressions) 7494 else: 7495 then = self.expression( 7496 exp.Update, 7497 expressions=self._match(TokenType.SET) 7498 and self._parse_csv(self._parse_equality), 7499 ) 7500 elif self._match(TokenType.DELETE): 7501 then = self.expression(exp.Var, this=self._prev.text) 7502 else: 7503 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7504 7505 whens.append( 7506 self.expression( 7507 exp.When, 7508 matched=matched, 7509 source=source, 7510 condition=condition, 7511 then=then, 7512 ) 7513 ) 7514 return self.expression(exp.Whens, expressions=whens) 7515 7516 def _parse_show(self) -> t.Optional[exp.Expression]: 7517 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7518 if parser: 7519 return parser(self) 7520 return self._parse_as_command(self._prev) 7521 7522 def _parse_set_item_assignment( 7523 self, kind: t.Optional[str] = None 7524 ) -> t.Optional[exp.Expression]: 7525 index = self._index 7526 7527 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7528 return self._parse_set_transaction(global_=kind == "GLOBAL") 7529 7530 left = self._parse_primary() or self._parse_column() 7531 assignment_delimiter = self._match_texts(("=", "TO")) 7532 7533 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7534 self._retreat(index) 7535 return None 7536 7537 right = self._parse_statement() or self._parse_id_var() 7538 if isinstance(right, (exp.Column, exp.Identifier)): 7539 right = exp.var(right.name) 7540 7541 this = self.expression(exp.EQ, this=left, expression=right) 7542 return self.expression(exp.SetItem, this=this, kind=kind) 7543 7544 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7545 self._match_text_seq("TRANSACTION") 7546 characteristics = self._parse_csv( 7547 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7548 ) 7549 return self.expression( 7550 exp.SetItem, 7551 expressions=characteristics, 7552 kind="TRANSACTION", 7553 **{"global": global_}, # type: ignore 7554 ) 7555 7556 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7557 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7558 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7559 7560 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7561 index = self._index 7562 set_ = self.expression( 7563 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7564 ) 7565 7566 if self._curr: 7567 self._retreat(index) 7568 return self._parse_as_command(self._prev) 7569 7570 return set_ 7571 7572 def _parse_var_from_options( 7573 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7574 ) -> t.Optional[exp.Var]: 7575 start = self._curr 7576 if not start: 7577 return None 7578 7579 option = start.text.upper() 7580 continuations = options.get(option) 7581 7582 index = self._index 7583 self._advance() 7584 for keywords in continuations or []: 7585 if isinstance(keywords, str): 7586 keywords = (keywords,) 7587 7588 if self._match_text_seq(*keywords): 7589 option = f"{option} {' '.join(keywords)}" 7590 break 7591 else: 7592 if continuations or continuations is None: 7593 if raise_unmatched: 7594 self.raise_error(f"Unknown option {option}") 7595 7596 self._retreat(index) 7597 return None 7598 7599 return exp.var(option) 7600 7601 def _parse_as_command(self, start: Token) -> exp.Command: 7602 while self._curr: 7603 self._advance() 7604 text = self._find_sql(start, self._prev) 7605 size = len(start.text) 7606 self._warn_unsupported() 7607 return exp.Command(this=text[:size], expression=text[size:]) 7608 7609 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7610 settings = [] 7611 7612 self._match_l_paren() 7613 kind = self._parse_id_var() 7614 7615 if self._match(TokenType.L_PAREN): 7616 while True: 7617 key = self._parse_id_var() 7618 value = self._parse_primary() 7619 if not key and value is None: 7620 break 7621 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7622 self._match(TokenType.R_PAREN) 7623 7624 self._match_r_paren() 7625 7626 return self.expression( 7627 exp.DictProperty, 7628 this=this, 7629 kind=kind.this if kind else None, 7630 settings=settings, 7631 ) 7632 7633 def _parse_dict_range(self, this: str) -> exp.DictRange: 7634 self._match_l_paren() 7635 has_min = self._match_text_seq("MIN") 7636 if has_min: 7637 min = self._parse_var() or self._parse_primary() 7638 self._match_text_seq("MAX") 7639 max = self._parse_var() or self._parse_primary() 7640 else: 7641 max = self._parse_var() or self._parse_primary() 7642 min = exp.Literal.number(0) 7643 self._match_r_paren() 7644 return self.expression(exp.DictRange, this=this, min=min, max=max) 7645 7646 def _parse_comprehension( 7647 self, this: t.Optional[exp.Expression] 7648 ) -> t.Optional[exp.Comprehension]: 7649 index = self._index 7650 expression = self._parse_column() 7651 if not self._match(TokenType.IN): 7652 self._retreat(index - 1) 7653 return None 7654 iterator = self._parse_column() 7655 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7656 return self.expression( 7657 exp.Comprehension, 7658 this=this, 7659 expression=expression, 7660 iterator=iterator, 7661 condition=condition, 7662 ) 7663 7664 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7665 if self._match(TokenType.HEREDOC_STRING): 7666 return self.expression(exp.Heredoc, this=self._prev.text) 7667 7668 if not self._match_text_seq("$"): 7669 return None 7670 7671 tags = ["$"] 7672 tag_text = None 7673 7674 if self._is_connected(): 7675 self._advance() 7676 tags.append(self._prev.text.upper()) 7677 else: 7678 self.raise_error("No closing $ found") 7679 7680 if tags[-1] != "$": 7681 if self._is_connected() and self._match_text_seq("$"): 7682 tag_text = tags[-1] 7683 tags.append("$") 7684 else: 7685 self.raise_error("No closing $ found") 7686 7687 heredoc_start = self._curr 7688 7689 while self._curr: 7690 if self._match_text_seq(*tags, advance=False): 7691 this = self._find_sql(heredoc_start, self._prev) 7692 self._advance(len(tags)) 7693 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7694 7695 self._advance() 7696 7697 self.raise_error(f"No closing {''.join(tags)} found") 7698 return None 7699 7700 def _find_parser( 7701 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7702 ) -> t.Optional[t.Callable]: 7703 if not self._curr: 7704 return None 7705 7706 index = self._index 7707 this = [] 7708 while True: 7709 # The current token might be multiple words 7710 curr = self._curr.text.upper() 7711 key = curr.split(" ") 7712 this.append(curr) 7713 7714 self._advance() 7715 result, trie = in_trie(trie, key) 7716 if result == TrieResult.FAILED: 7717 break 7718 7719 if result == TrieResult.EXISTS: 7720 subparser = parsers[" ".join(this)] 7721 return subparser 7722 7723 self._retreat(index) 7724 return None 7725 7726 def _match(self, token_type, advance=True, expression=None): 7727 if not self._curr: 7728 return None 7729 7730 if self._curr.token_type == token_type: 7731 if advance: 7732 self._advance() 7733 self._add_comments(expression) 7734 return True 7735 7736 return None 7737 7738 def _match_set(self, types, advance=True): 7739 if not self._curr: 7740 return None 7741 7742 if self._curr.token_type in types: 7743 if advance: 7744 self._advance() 7745 return True 7746 7747 return None 7748 7749 def _match_pair(self, token_type_a, token_type_b, advance=True): 7750 if not self._curr or not self._next: 7751 return None 7752 7753 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7754 if advance: 7755 self._advance(2) 7756 return True 7757 7758 return None 7759 7760 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7761 if not self._match(TokenType.L_PAREN, expression=expression): 7762 self.raise_error("Expecting (") 7763 7764 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7765 if not self._match(TokenType.R_PAREN, expression=expression): 7766 self.raise_error("Expecting )") 7767 7768 def _match_texts(self, texts, advance=True): 7769 if ( 7770 self._curr 7771 and self._curr.token_type != TokenType.STRING 7772 and self._curr.text.upper() in texts 7773 ): 7774 if advance: 7775 self._advance() 7776 return True 7777 return None 7778 7779 def _match_text_seq(self, *texts, advance=True): 7780 index = self._index 7781 for text in texts: 7782 if ( 7783 self._curr 7784 and self._curr.token_type != TokenType.STRING 7785 and self._curr.text.upper() == text 7786 ): 7787 self._advance() 7788 else: 7789 self._retreat(index) 7790 return None 7791 7792 if not advance: 7793 self._retreat(index) 7794 7795 return True 7796 7797 def _replace_lambda( 7798 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7799 ) -> t.Optional[exp.Expression]: 7800 if not node: 7801 return node 7802 7803 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7804 7805 for column in node.find_all(exp.Column): 7806 typ = lambda_types.get(column.parts[0].name) 7807 if typ is not None: 7808 dot_or_id = column.to_dot() if column.table else column.this 7809 7810 if typ: 7811 dot_or_id = self.expression( 7812 exp.Cast, 7813 this=dot_or_id, 7814 to=typ, 7815 ) 7816 7817 parent = column.parent 7818 7819 while isinstance(parent, exp.Dot): 7820 if not isinstance(parent.parent, exp.Dot): 7821 parent.replace(dot_or_id) 7822 break 7823 parent = parent.parent 7824 else: 7825 if column is node: 7826 node = dot_or_id 7827 else: 7828 column.replace(dot_or_id) 7829 return node 7830 7831 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7832 start = self._prev 7833 7834 # Not to be confused with TRUNCATE(number, decimals) function call 7835 if self._match(TokenType.L_PAREN): 7836 self._retreat(self._index - 2) 7837 return self._parse_function() 7838 7839 # Clickhouse supports TRUNCATE DATABASE as well 7840 is_database = self._match(TokenType.DATABASE) 7841 7842 self._match(TokenType.TABLE) 7843 7844 exists = self._parse_exists(not_=False) 7845 7846 expressions = self._parse_csv( 7847 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7848 ) 7849 7850 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7851 7852 if self._match_text_seq("RESTART", "IDENTITY"): 7853 identity = "RESTART" 7854 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7855 identity = "CONTINUE" 7856 else: 7857 identity = None 7858 7859 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7860 option = self._prev.text 7861 else: 7862 option = None 7863 7864 partition = self._parse_partition() 7865 7866 # Fallback case 7867 if self._curr: 7868 return self._parse_as_command(start) 7869 7870 return self.expression( 7871 exp.TruncateTable, 7872 expressions=expressions, 7873 is_database=is_database, 7874 exists=exists, 7875 cluster=cluster, 7876 identity=identity, 7877 option=option, 7878 partition=partition, 7879 ) 7880 7881 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7882 this = self._parse_ordered(self._parse_opclass) 7883 7884 if not self._match(TokenType.WITH): 7885 return this 7886 7887 op = self._parse_var(any_token=True) 7888 7889 return self.expression(exp.WithOperator, this=this, op=op) 7890 7891 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7892 self._match(TokenType.EQ) 7893 self._match(TokenType.L_PAREN) 7894 7895 opts: t.List[t.Optional[exp.Expression]] = [] 7896 while self._curr and not self._match(TokenType.R_PAREN): 7897 if self._match_text_seq("FORMAT_NAME", "="): 7898 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7899 # so we parse it separately to use _parse_field() 7900 prop = self.expression( 7901 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7902 ) 7903 opts.append(prop) 7904 else: 7905 opts.append(self._parse_property()) 7906 7907 self._match(TokenType.COMMA) 7908 7909 return opts 7910 7911 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7912 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7913 7914 options = [] 7915 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7916 option = self._parse_var(any_token=True) 7917 prev = self._prev.text.upper() 7918 7919 # Different dialects might separate options and values by white space, "=" and "AS" 7920 self._match(TokenType.EQ) 7921 self._match(TokenType.ALIAS) 7922 7923 param = self.expression(exp.CopyParameter, this=option) 7924 7925 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7926 TokenType.L_PAREN, advance=False 7927 ): 7928 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7929 param.set("expressions", self._parse_wrapped_options()) 7930 elif prev == "FILE_FORMAT": 7931 # T-SQL's external file format case 7932 param.set("expression", self._parse_field()) 7933 else: 7934 param.set("expression", self._parse_unquoted_field()) 7935 7936 options.append(param) 7937 self._match(sep) 7938 7939 return options 7940 7941 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7942 expr = self.expression(exp.Credentials) 7943 7944 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7945 expr.set("storage", self._parse_field()) 7946 if self._match_text_seq("CREDENTIALS"): 7947 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7948 creds = ( 7949 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7950 ) 7951 expr.set("credentials", creds) 7952 if self._match_text_seq("ENCRYPTION"): 7953 expr.set("encryption", self._parse_wrapped_options()) 7954 if self._match_text_seq("IAM_ROLE"): 7955 expr.set("iam_role", self._parse_field()) 7956 if self._match_text_seq("REGION"): 7957 expr.set("region", self._parse_field()) 7958 7959 return expr 7960 7961 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7962 return self._parse_field() 7963 7964 def _parse_copy(self) -> exp.Copy | exp.Command: 7965 start = self._prev 7966 7967 self._match(TokenType.INTO) 7968 7969 this = ( 7970 self._parse_select(nested=True, parse_subquery_alias=False) 7971 if self._match(TokenType.L_PAREN, advance=False) 7972 else self._parse_table(schema=True) 7973 ) 7974 7975 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7976 7977 files = self._parse_csv(self._parse_file_location) 7978 credentials = self._parse_credentials() 7979 7980 self._match_text_seq("WITH") 7981 7982 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7983 7984 # Fallback case 7985 if self._curr: 7986 return self._parse_as_command(start) 7987 7988 return self.expression( 7989 exp.Copy, 7990 this=this, 7991 kind=kind, 7992 credentials=credentials, 7993 files=files, 7994 params=params, 7995 ) 7996 7997 def _parse_normalize(self) -> exp.Normalize: 7998 return self.expression( 7999 exp.Normalize, 8000 this=self._parse_bitwise(), 8001 form=self._match(TokenType.COMMA) and self._parse_var(), 8002 ) 8003 8004 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8005 args = self._parse_csv(lambda: self._parse_lambda()) 8006 8007 this = seq_get(args, 0) 8008 decimals = seq_get(args, 1) 8009 8010 return expr_type( 8011 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8012 ) 8013 8014 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8015 if self._match_text_seq("COLUMNS", "(", advance=False): 8016 this = self._parse_function() 8017 if isinstance(this, exp.Columns): 8018 this.set("unpack", True) 8019 return this 8020 8021 return self.expression( 8022 exp.Star, 8023 **{ # type: ignore 8024 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8025 "replace": self._parse_star_op("REPLACE"), 8026 "rename": self._parse_star_op("RENAME"), 8027 }, 8028 ) 8029 8030 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8031 privilege_parts = [] 8032 8033 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8034 # (end of privilege list) or L_PAREN (start of column list) are met 8035 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8036 privilege_parts.append(self._curr.text.upper()) 8037 self._advance() 8038 8039 this = exp.var(" ".join(privilege_parts)) 8040 expressions = ( 8041 self._parse_wrapped_csv(self._parse_column) 8042 if self._match(TokenType.L_PAREN, advance=False) 8043 else None 8044 ) 8045 8046 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8047 8048 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8049 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8050 principal = self._parse_id_var() 8051 8052 if not principal: 8053 return None 8054 8055 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8056 8057 def _parse_grant(self) -> exp.Grant | exp.Command: 8058 start = self._prev 8059 8060 privileges = self._parse_csv(self._parse_grant_privilege) 8061 8062 self._match(TokenType.ON) 8063 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8064 8065 # Attempt to parse the securable e.g. MySQL allows names 8066 # such as "foo.*", "*.*" which are not easily parseable yet 8067 securable = self._try_parse(self._parse_table_parts) 8068 8069 if not securable or not self._match_text_seq("TO"): 8070 return self._parse_as_command(start) 8071 8072 principals = self._parse_csv(self._parse_grant_principal) 8073 8074 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8075 8076 if self._curr: 8077 return self._parse_as_command(start) 8078 8079 return self.expression( 8080 exp.Grant, 8081 privileges=privileges, 8082 kind=kind, 8083 securable=securable, 8084 principals=principals, 8085 grant_option=grant_option, 8086 ) 8087 8088 def _parse_overlay(self) -> exp.Overlay: 8089 return self.expression( 8090 exp.Overlay, 8091 **{ # type: ignore 8092 "this": self._parse_bitwise(), 8093 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8094 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8095 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8096 }, 8097 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1466 def __init__( 1467 self, 1468 error_level: t.Optional[ErrorLevel] = None, 1469 error_message_context: int = 100, 1470 max_errors: int = 3, 1471 dialect: DialectType = None, 1472 ): 1473 from sqlglot.dialects import Dialect 1474 1475 self.error_level = error_level or ErrorLevel.IMMEDIATE 1476 self.error_message_context = error_message_context 1477 self.max_errors = max_errors 1478 self.dialect = Dialect.get_or_raise(dialect) 1479 self.reset()
1491 def parse( 1492 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1493 ) -> t.List[t.Optional[exp.Expression]]: 1494 """ 1495 Parses a list of tokens and returns a list of syntax trees, one tree 1496 per parsed SQL statement. 1497 1498 Args: 1499 raw_tokens: The list of tokens. 1500 sql: The original SQL string, used to produce helpful debug messages. 1501 1502 Returns: 1503 The list of the produced syntax trees. 1504 """ 1505 return self._parse( 1506 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1507 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1509 def parse_into( 1510 self, 1511 expression_types: exp.IntoType, 1512 raw_tokens: t.List[Token], 1513 sql: t.Optional[str] = None, 1514 ) -> t.List[t.Optional[exp.Expression]]: 1515 """ 1516 Parses a list of tokens into a given Expression type. If a collection of Expression 1517 types is given instead, this method will try to parse the token list into each one 1518 of them, stopping at the first for which the parsing succeeds. 1519 1520 Args: 1521 expression_types: The expression type(s) to try and parse the token list into. 1522 raw_tokens: The list of tokens. 1523 sql: The original SQL string, used to produce helpful debug messages. 1524 1525 Returns: 1526 The target Expression. 1527 """ 1528 errors = [] 1529 for expression_type in ensure_list(expression_types): 1530 parser = self.EXPRESSION_PARSERS.get(expression_type) 1531 if not parser: 1532 raise TypeError(f"No parser registered for {expression_type}") 1533 1534 try: 1535 return self._parse(parser, raw_tokens, sql) 1536 except ParseError as e: 1537 e.errors[0]["into_expression"] = expression_type 1538 errors.append(e) 1539 1540 raise ParseError( 1541 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1542 errors=merge_errors(errors), 1543 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1583 def check_errors(self) -> None: 1584 """Logs or raises any found errors, depending on the chosen error level setting.""" 1585 if self.error_level == ErrorLevel.WARN: 1586 for error in self.errors: 1587 logger.error(str(error)) 1588 elif self.error_level == ErrorLevel.RAISE and self.errors: 1589 raise ParseError( 1590 concat_messages(self.errors, self.max_errors), 1591 errors=merge_errors(self.errors), 1592 )
Logs or raises any found errors, depending on the chosen error level setting.
1594 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1595 """ 1596 Appends an error in the list of recorded errors or raises it, depending on the chosen 1597 error level setting. 1598 """ 1599 token = token or self._curr or self._prev or Token.string("") 1600 start = token.start 1601 end = token.end + 1 1602 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1603 highlight = self.sql[start:end] 1604 end_context = self.sql[end : end + self.error_message_context] 1605 1606 error = ParseError.new( 1607 f"{message}. Line {token.line}, Col: {token.col}.\n" 1608 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1609 description=message, 1610 line=token.line, 1611 col=token.col, 1612 start_context=start_context, 1613 highlight=highlight, 1614 end_context=end_context, 1615 ) 1616 1617 if self.error_level == ErrorLevel.IMMEDIATE: 1618 raise error 1619 1620 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1622 def expression( 1623 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1624 ) -> E: 1625 """ 1626 Creates a new, validated Expression. 1627 1628 Args: 1629 exp_class: The expression class to instantiate. 1630 comments: An optional list of comments to attach to the expression. 1631 kwargs: The arguments to set for the expression along with their respective values. 1632 1633 Returns: 1634 The target expression. 1635 """ 1636 instance = exp_class(**kwargs) 1637 instance.add_comments(comments) if comments else self._add_comments(instance) 1638 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1645 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1646 """ 1647 Validates an Expression, making sure that all its mandatory arguments are set. 1648 1649 Args: 1650 expression: The expression to validate. 1651 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1652 1653 Returns: 1654 The validated expression. 1655 """ 1656 if self.error_level != ErrorLevel.IGNORE: 1657 for error_message in expression.error_messages(args): 1658 self.raise_error(error_message) 1659 1660 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.